diff --git a/CMakeLists.txt b/CMakeLists.txt index 608a6ce..b4b896b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,6 +15,17 @@ set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) +# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native") +# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx")# Enable all features your current CPU supports +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native") +# Force AVX2 +# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -mavx2") +# Or force AVX-512 +# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -mavx512f") + + + + option(CUDA_USE_STATIC_CUDA_RUNTIME OFF) # For larger projects @@ -149,4 +160,5 @@ target_link_libraries(${PROJECT_NAME} nvdsgst_infer nvds_meta nvds_inferutils nvdsgst_meta nvds_utils nvdsgst_helper prometheus-cpp-core prometheus-cpp-pull # prometheus-cpp-exposer nvdsgst_metnvdsa microhttpd - nvdsgst_nvmultiurisrcbin) \ No newline at end of file + nvdsgst_nvmultiurisrcbin + nvds_batch_jpegenc) \ No newline at end of file diff --git a/data/configuration.json b/data/configuration.json index 3af48d5..d4c0c28 100644 --- a/data/configuration.json +++ b/data/configuration.json @@ -65,5 +65,6 @@ "redis_broker_host": "localhost", "redis_broker_port": 6379, "topic_redis": "redis_stream" - } + }, + "compression_coefficient": 0.125 } \ No newline at end of file diff --git a/src/face_nv_infer_server_manager.cpp b/src/face_nv_infer_server_manager.cpp index 1b3c3d0..83f446c 100644 --- a/src/face_nv_infer_server_manager.cpp +++ b/src/face_nv_infer_server_manager.cpp @@ -45,7 +45,7 @@ FaceNvInferServerManager::FaceNvInferServerManager() { // FACE_NET_HEIGHT = config["PGIE_NET_HEIGHT"]; MUXER_OUTPUT_WIDTH = config["MUXER_OUTPUT_WIDTH"]; MUXER_OUTPUT_HEIGHT = config["MUXER_OUTPUT_HEIGHT"]; - threshold_face_detection = config["threshold_body_detection"]; + threshold_face_detection = config["threshold_face_detection"]; } bool FaceNvInferServerManager::create_face_nv_infer_server(int num_sources) { @@ -1148,6 +1148,58 @@ void FaceNvInferServerManager::add_face_body(int object_id, float face_score) { return; } +// AVX check function +bool FaceNvInferServerManager::allZeroAVX(const float *data, size_t size) { + size_t i = 0; + __m256 zero = _mm256_setzero_ps(); // 8 floats of 0.0 + for (; i + 8 <= size; i += 8) { + __m256 v = _mm256_loadu_ps(&data[i]); // load 8 floats + __m256 cmp = _mm256_cmp_ps(v, zero, _CMP_NEQ_OQ); // compare != 0 + if (_mm256_movemask_ps(cmp)) return false; // if any != 0, exit + } + // leftover elements + for (; i < size; ++i) { + if (data[i] != 0.0f) return false; + } + return true; +} + +bool FaceNvInferServerManager::allZero(const float *data, size_t size) { + size_t i = 0; + +#if defined(__AVX512F__) + // 16 floats per iteration + __m512 zero512 = _mm512_setzero_ps(); + for (; i + 16 <= size; i += 16) { + __m512 v = _mm512_loadu_ps(&data[i]); + __mmask16 cmp = _mm512_cmp_ps_mask(v, zero512, _CMP_NEQ_OQ); + if (cmp) return false; + } +#elif defined(__AVX2__) + // 8 floats per iteration + __m256 zero256 = _mm256_setzero_ps(); + for (; i + 8 <= size; i += 8) { + __m256 v = _mm256_loadu_ps(&data[i]); + __m256 cmp = _mm256_cmp_ps(v, zero256, _CMP_NEQ_OQ); + if (_mm256_movemask_ps(cmp)) return false; + } +#elif defined(__SSE__) + // 4 floats per iteration + __m128 zero128 = _mm_setzero_ps(); + for (; i + 4 <= size; i += 4) { + __m128 v = _mm_loadu_ps(&data[i]); + __m128 cmp = _mm_cmpneq_ps(v, zero128); + if (_mm_movemask_ps(cmp)) return false; + } +#endif + + // leftover elements + for (; i < size; ++i) { + if (data[i] != 0.0f) return false; + } + return true; +} + /* This is the buffer probe function that we have registered on the sink pad * of the tiler element. All SGIE infer elements in the pipeline shall attach * their NvDsInferTensorMeta to each object's metadata of each frame, here we @@ -1156,11 +1208,22 @@ void FaceNvInferServerManager::add_face_body(int object_id, float face_score) { * metadata. */ GstPadProbeReturn FaceNvInferServerManager::sgie_pad_buffer_probe( - GstPad *pad, GstPadProbeInfo *info, gpointer u_data) { + GstPad *pad, GstPadProbeInfo *info, gpointer ctx) { + (void)ctx; + GstBuffer *buf = (GstBuffer *)info->data; + GstMapInfo inmap = GST_MAP_INFO_INIT; + if (!gst_buffer_map(buf, &inmap, GST_MAP_READ)) { + GST_ERROR("input buffer mapinfo failed"); + return GST_PAD_PROBE_DROP; + } + NvBufSurface *ip_surf = (NvBufSurface *)inmap.data; + gst_buffer_unmap(buf, &inmap); + (void)ip_surf; + (void)pad; // static guint use_device_mem = 0; - gboolean *use_new_mux = (gboolean *)u_data; - (void)use_new_mux; + // gboolean *use_new_mux = (gboolean *)u_data; + // (void)use_new_mux; static NvDsInferNetworkInfo networkInfo{FACE_NET_WIDTH, FACE_NET_HEIGHT, 3}; // (void)networkInfo; @@ -1168,6 +1231,7 @@ GstPadProbeReturn FaceNvInferServerManager::sgie_pad_buffer_probe( NvDsBatchMeta *batch_meta = gst_buffer_get_nvds_batch_meta(GST_BUFFER(info->data)); if (!batch_meta) return GST_PAD_PROBE_OK; + bool is_zero_embedding_vector; /* Iterate each frame metadata in batch */ for (NvDsMetaList *l_frame = batch_meta->frame_meta_list; l_frame != NULL; @@ -1280,8 +1344,7 @@ GstPadProbeReturn FaceNvInferServerManager::sgie_pad_buffer_probe( unsigned int numDims = layer.inferDims.numDims; unsigned int numElements = layer.inferDims.numElements; - // (void)numElements; - // (void)numDims; + (void)numElements; // std::cout << "Layer " << jkl << " (" << layer.layerName // << "):\n"; @@ -1298,11 +1361,12 @@ GstPadProbeReturn FaceNvInferServerManager::sgie_pad_buffer_probe( float *data_face = static_cast(layer.buffer); if (!(strcmp(layer.layerName, "embedding") == 0)) { - for (unsigned int xyz = 0; xyz < numElements; xyz++) { - // std::cout << "data_face[" << xyz - // << "]= " << data_face[xyz] << - // std::endl; - } + // for (unsigned int xyz = 0; xyz < numElements; xyz++) + // { + // std::cout << "data_face[" << xyz + // << "]= " << data_face[xyz] << + // std::endl; + // } if ((strcmp(layer.layerName, "bbox") == 0)) { for (int l = 0; l < 4; l++) { face_location[l] = data_face[l]; @@ -1313,21 +1377,27 @@ GstPadProbeReturn FaceNvInferServerManager::sgie_pad_buffer_probe( } if ((strcmp(layer.layerName, "score") == 0)) { score_face = data_face[0]; - // std::cout << "score_face= " << score_face << - // std::endl; if (score_face>0.9){ + // std::cout << "score_face= " << score_face + // <0.9){ // high_confidence_faces++; // std::cout << "high_confidence_faces= " << // high_confidence_faces << std::endl; // std::quick_exit(0); // } } + } else { + is_zero_embedding_vector = allZero(data_face, 512); + // std::cout<<"is_zero_embedding_vector = + // "< // for AVX intrinsics + +#include "config_manager.hpp" +#include "custom_gstnvdsinfer.hpp" #include "nvdsmeta.h" #include "nvdsmeta_schema.h" -#include "custom_gstnvdsinfer.hpp" -#include "config_manager.hpp" - class FaceNvInferServerManager { private: @@ -51,8 +52,7 @@ class FaceNvInferServerManager { // static gpointer copy_user_meta(gpointer, gpointer); // static void release_user_meta(gpointer, gpointer); - static GstPadProbeReturn sgie_pad_buffer_probe(GstPad *, GstPadProbeInfo - *, + static GstPadProbeReturn sgie_pad_buffer_probe(GstPad *, GstPadProbeInfo *, gpointer); // static GstPadProbeReturn osd_sink_pad_buffer_probe_new(GstPad *, // GstPadProbeInfo @@ -60,7 +60,9 @@ class FaceNvInferServerManager { static void *set_metadata_ptr(float *); static gpointer copy_user_meta(gpointer, gpointer); static void release_user_meta(gpointer, gpointer); - static NvOSD_RectParams * allign_postprocess(NvOSD_RectParams &, float*); + static NvOSD_RectParams *allign_postprocess(NvOSD_RectParams &, float *); static float numpy_clip(float, float, float); static void add_face_body(int, float); + static bool allZeroAVX(const float *, size_t); + static bool allZero(const float *, size_t); }; \ No newline at end of file diff --git a/src/nv_osd_manager.cpp b/src/nv_osd_manager.cpp index ba0d384..fd13065 100644 --- a/src/nv_osd_manager.cpp +++ b/src/nv_osd_manager.cpp @@ -30,7 +30,7 @@ bool NvOsdManager::create_nv_osd() { } // Attach probe to a pad in the pipeline -void NvOsdManager::attach_probe_to_element() { +void NvOsdManager::attach_probe_to_src_nvosd() { GstPad *src_pad = gst_element_get_static_pad(nvosd, "src"); if (!src_pad) { std::cerr << "Unable to get nvosd src pad\n"; @@ -119,3 +119,22 @@ GstPadProbeReturn NvOsdManager::osd_src_pad_buffer_probe(GstPad *pad, frame_number++; return GST_PAD_PROBE_OK; } + +// Attach probe to a pad in the pipeline +void NvOsdManager::attach_probe_to_sink_nvosd() { + GstPad *sink_pad = gst_element_get_static_pad(nvosd, "sink"); + if (!sink_pad) { + std::cerr << "Unable to get nvosd src pad\n"; + return; + } + + gst_pad_add_probe(sink_pad, GST_PAD_PROBE_TYPE_BUFFER, + osd_sink_pad_buffer_probe, NULL, NULL); + gst_object_unref(sink_pad); +} + +GstPadProbeReturn NvOsdManager::osd_sink_pad_buffer_probe(GstPad *, + GstPadProbeInfo *, + gpointer) { + return GST_PAD_PROBE_OK; +} \ No newline at end of file diff --git a/src/nv_osd_manager.hpp b/src/nv_osd_manager.hpp index cca3444..d74af28 100644 --- a/src/nv_osd_manager.hpp +++ b/src/nv_osd_manager.hpp @@ -13,8 +13,12 @@ class NvOsdManager { bool create_nv_osd(); ~NvOsdManager(); static gint frame_number; - void attach_probe_to_element(); + void attach_probe_to_src_nvosd(); static GstPadProbeReturn osd_src_pad_buffer_probe(GstPad *, GstPadProbeInfo *, gpointer); + void attach_probe_to_sink_nvosd(); + static GstPadProbeReturn osd_sink_pad_buffer_probe(GstPad *, + GstPadProbeInfo *, + gpointer); }; \ No newline at end of file diff --git a/src/pipeline_manager.cpp b/src/pipeline_manager.cpp index 262d048..296e867 100644 --- a/src/pipeline_manager.cpp +++ b/src/pipeline_manager.cpp @@ -1,4 +1,5 @@ #include "pipeline_manager.hpp" +#define GPU_ID 0 double PipelineManager::fps_buffer_probe = 0; double PipelineManager::fps_probe = 0; @@ -595,6 +596,18 @@ bool PipelineManager::create_pipeline_elements(int num_sources, sink_manager->create_sink(prop, rtsp_streaming_manager->host, rtsp_streaming_manager->updsink_port_num); sink_manager->create_fake_sink(); + + // Create Context for Object Encoding. + // Creates and initializes an object encoder context. + // This context manages resources such as GPU memory, encoders, and + // parameters (resolution, quality, scaling, etc.) needed for encoding + // objects into images. create this once per pipeline. + NvDsObjEncCtxHandle obj_ctx_handle = nvds_obj_enc_create_context(GPU_ID); + if (!obj_ctx_handle) { + g_print("Unable to create context\n"); + return -1; + } + nv_infer_server_manager->create_nv_infer_server(num_sources); // GstElement *nvinfer = gst_bin_get_by_name(GST_BIN(pipeline), @@ -611,7 +624,7 @@ bool PipelineManager::create_pipeline_elements(int num_sources, face_nv_infer_server_manager->create_face_nv_infer_server(num_sources); nv_osd_manager - ->attach_probe_to_element(); // nvinfer Or use "nvtracker" if after + ->attach_probe_to_src_nvosd(); // nvinfer Or use "nvtracker" if after message_handling->create_message_handler(pipeline, g_run_forever, loop); setup_pipeline(); @@ -639,7 +652,7 @@ bool PipelineManager::create_pipeline_elements(int num_sources, face_nv_infer_server_manager->face_detector, "src"); gst_pad_add_probe(sgie_src_pad, GST_PAD_PROBE_TYPE_BUFFER, face_nv_infer_server_manager->sgie_pad_buffer_probe, - &use_new_mux, NULL); + (gpointer)obj_ctx_handle, NULL); auto start = std::chrono::system_clock::now(); status_playing = playing_pipeline(num_sources, url_camera); @@ -676,6 +689,7 @@ bool PipelineManager::create_pipeline_elements(int num_sources, /* Out of the main loop, clean up nicely */ g_print("Returned, stopping playback \n"); + nvds_obj_enc_destroy_context(obj_ctx_handle); /* Release the request pads from the tee, and unref them */ gst_element_release_request_pad(tee_manager->tee, tee_manager->tee_msg_pad); gst_element_release_request_pad(tee_manager->tee,