Check embedding zero via simd

This commit is contained in:
Barzan Hayati 2025-09-10 20:27:58 +00:00
parent 9e9b645b95
commit fbc5a1ff96
7 changed files with 153 additions and 31 deletions

View File

@ -15,6 +15,17 @@ set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx")# Enable all features your current CPU supports
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native")
# Force AVX2
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -mavx2")
# Or force AVX-512
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -mavx512f")
option(CUDA_USE_STATIC_CUDA_RUNTIME OFF) option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
# For larger projects # For larger projects
@ -149,4 +160,5 @@ target_link_libraries(${PROJECT_NAME} nvdsgst_infer nvds_meta nvds_inferutils
nvdsgst_meta nvds_utils nvdsgst_helper nvdsgst_meta nvds_utils nvdsgst_helper
prometheus-cpp-core prometheus-cpp-pull # prometheus-cpp-exposer nvdsgst_metnvdsa prometheus-cpp-core prometheus-cpp-pull # prometheus-cpp-exposer nvdsgst_metnvdsa
microhttpd microhttpd
nvdsgst_nvmultiurisrcbin) nvdsgst_nvmultiurisrcbin
nvds_batch_jpegenc)

View File

@ -65,5 +65,6 @@
"redis_broker_host": "localhost", "redis_broker_host": "localhost",
"redis_broker_port": 6379, "redis_broker_port": 6379,
"topic_redis": "redis_stream" "topic_redis": "redis_stream"
} },
"compression_coefficient": 0.125
} }

View File

@ -45,7 +45,7 @@ FaceNvInferServerManager::FaceNvInferServerManager() {
// FACE_NET_HEIGHT = config["PGIE_NET_HEIGHT"]; // FACE_NET_HEIGHT = config["PGIE_NET_HEIGHT"];
MUXER_OUTPUT_WIDTH = config["MUXER_OUTPUT_WIDTH"]; MUXER_OUTPUT_WIDTH = config["MUXER_OUTPUT_WIDTH"];
MUXER_OUTPUT_HEIGHT = config["MUXER_OUTPUT_HEIGHT"]; MUXER_OUTPUT_HEIGHT = config["MUXER_OUTPUT_HEIGHT"];
threshold_face_detection = config["threshold_body_detection"]; threshold_face_detection = config["threshold_face_detection"];
} }
bool FaceNvInferServerManager::create_face_nv_infer_server(int num_sources) { bool FaceNvInferServerManager::create_face_nv_infer_server(int num_sources) {
@ -1148,6 +1148,58 @@ void FaceNvInferServerManager::add_face_body(int object_id, float face_score) {
return; return;
} }
// AVX check function
bool FaceNvInferServerManager::allZeroAVX(const float *data, size_t size) {
size_t i = 0;
__m256 zero = _mm256_setzero_ps(); // 8 floats of 0.0
for (; i + 8 <= size; i += 8) {
__m256 v = _mm256_loadu_ps(&data[i]); // load 8 floats
__m256 cmp = _mm256_cmp_ps(v, zero, _CMP_NEQ_OQ); // compare != 0
if (_mm256_movemask_ps(cmp)) return false; // if any != 0, exit
}
// leftover elements
for (; i < size; ++i) {
if (data[i] != 0.0f) return false;
}
return true;
}
bool FaceNvInferServerManager::allZero(const float *data, size_t size) {
size_t i = 0;
#if defined(__AVX512F__)
// 16 floats per iteration
__m512 zero512 = _mm512_setzero_ps();
for (; i + 16 <= size; i += 16) {
__m512 v = _mm512_loadu_ps(&data[i]);
__mmask16 cmp = _mm512_cmp_ps_mask(v, zero512, _CMP_NEQ_OQ);
if (cmp) return false;
}
#elif defined(__AVX2__)
// 8 floats per iteration
__m256 zero256 = _mm256_setzero_ps();
for (; i + 8 <= size; i += 8) {
__m256 v = _mm256_loadu_ps(&data[i]);
__m256 cmp = _mm256_cmp_ps(v, zero256, _CMP_NEQ_OQ);
if (_mm256_movemask_ps(cmp)) return false;
}
#elif defined(__SSE__)
// 4 floats per iteration
__m128 zero128 = _mm_setzero_ps();
for (; i + 4 <= size; i += 4) {
__m128 v = _mm_loadu_ps(&data[i]);
__m128 cmp = _mm_cmpneq_ps(v, zero128);
if (_mm_movemask_ps(cmp)) return false;
}
#endif
// leftover elements
for (; i < size; ++i) {
if (data[i] != 0.0f) return false;
}
return true;
}
/* This is the buffer probe function that we have registered on the sink pad /* This is the buffer probe function that we have registered on the sink pad
* of the tiler element. All SGIE infer elements in the pipeline shall attach * of the tiler element. All SGIE infer elements in the pipeline shall attach
* their NvDsInferTensorMeta to each object's metadata of each frame, here we * their NvDsInferTensorMeta to each object's metadata of each frame, here we
@ -1156,11 +1208,22 @@ void FaceNvInferServerManager::add_face_body(int object_id, float face_score) {
* metadata. * metadata.
*/ */
GstPadProbeReturn FaceNvInferServerManager::sgie_pad_buffer_probe( GstPadProbeReturn FaceNvInferServerManager::sgie_pad_buffer_probe(
GstPad *pad, GstPadProbeInfo *info, gpointer u_data) { GstPad *pad, GstPadProbeInfo *info, gpointer ctx) {
(void)ctx;
GstBuffer *buf = (GstBuffer *)info->data;
GstMapInfo inmap = GST_MAP_INFO_INIT;
if (!gst_buffer_map(buf, &inmap, GST_MAP_READ)) {
GST_ERROR("input buffer mapinfo failed");
return GST_PAD_PROBE_DROP;
}
NvBufSurface *ip_surf = (NvBufSurface *)inmap.data;
gst_buffer_unmap(buf, &inmap);
(void)ip_surf;
(void)pad; (void)pad;
// static guint use_device_mem = 0; // static guint use_device_mem = 0;
gboolean *use_new_mux = (gboolean *)u_data; // gboolean *use_new_mux = (gboolean *)u_data;
(void)use_new_mux; // (void)use_new_mux;
static NvDsInferNetworkInfo networkInfo{FACE_NET_WIDTH, FACE_NET_HEIGHT, 3}; static NvDsInferNetworkInfo networkInfo{FACE_NET_WIDTH, FACE_NET_HEIGHT, 3};
// (void)networkInfo; // (void)networkInfo;
@ -1168,6 +1231,7 @@ GstPadProbeReturn FaceNvInferServerManager::sgie_pad_buffer_probe(
NvDsBatchMeta *batch_meta = NvDsBatchMeta *batch_meta =
gst_buffer_get_nvds_batch_meta(GST_BUFFER(info->data)); gst_buffer_get_nvds_batch_meta(GST_BUFFER(info->data));
if (!batch_meta) return GST_PAD_PROBE_OK; if (!batch_meta) return GST_PAD_PROBE_OK;
bool is_zero_embedding_vector;
/* Iterate each frame metadata in batch */ /* Iterate each frame metadata in batch */
for (NvDsMetaList *l_frame = batch_meta->frame_meta_list; l_frame != NULL; for (NvDsMetaList *l_frame = batch_meta->frame_meta_list; l_frame != NULL;
@ -1280,8 +1344,7 @@ GstPadProbeReturn FaceNvInferServerManager::sgie_pad_buffer_probe(
unsigned int numDims = layer.inferDims.numDims; unsigned int numDims = layer.inferDims.numDims;
unsigned int numElements = layer.inferDims.numElements; unsigned int numElements = layer.inferDims.numElements;
// (void)numElements; (void)numElements;
// (void)numDims;
// std::cout << "Layer " << jkl << " (" << layer.layerName // std::cout << "Layer " << jkl << " (" << layer.layerName
// << "):\n"; // << "):\n";
@ -1298,11 +1361,12 @@ GstPadProbeReturn FaceNvInferServerManager::sgie_pad_buffer_probe(
float *data_face = static_cast<float *>(layer.buffer); float *data_face = static_cast<float *>(layer.buffer);
if (!(strcmp(layer.layerName, "embedding") == 0)) { if (!(strcmp(layer.layerName, "embedding") == 0)) {
for (unsigned int xyz = 0; xyz < numElements; xyz++) { // for (unsigned int xyz = 0; xyz < numElements; xyz++)
// std::cout << "data_face[" << xyz // {
// << "]= " << data_face[xyz] << // std::cout << "data_face[" << xyz
// std::endl; // << "]= " << data_face[xyz] <<
} // std::endl;
// }
if ((strcmp(layer.layerName, "bbox") == 0)) { if ((strcmp(layer.layerName, "bbox") == 0)) {
for (int l = 0; l < 4; l++) { for (int l = 0; l < 4; l++) {
face_location[l] = data_face[l]; face_location[l] = data_face[l];
@ -1313,21 +1377,27 @@ GstPadProbeReturn FaceNvInferServerManager::sgie_pad_buffer_probe(
} }
if ((strcmp(layer.layerName, "score") == 0)) { if ((strcmp(layer.layerName, "score") == 0)) {
score_face = data_face[0]; score_face = data_face[0];
// std::cout << "score_face= " << score_face << // std::cout << "score_face= " << score_face
// std::endl; if (score_face>0.9){ // <<std::endl; if (score_face>0.9){
// high_confidence_faces++; // high_confidence_faces++;
// std::cout << "high_confidence_faces= " << // std::cout << "high_confidence_faces= " <<
// high_confidence_faces << std::endl; // high_confidence_faces << std::endl;
// std::quick_exit(0); // std::quick_exit(0);
// } // }
} }
} else {
is_zero_embedding_vector = allZero(data_face, 512);
// std::cout<<"is_zero_embedding_vector =
// "<<is_zero_embedding_vector<<std::endl; for (int l =
// 0; l < 512; l++) {
// std::cout << "face_location[" << l
// << "]= " << data_face[l] << std::endl;
// }
} }
// else{ }
// for (int l = 0; l < 512; l++) { // std::quick_exit(0);
// std::cout << "face_location[" << l if (is_zero_embedding_vector == 0) {
// << "]= " << data_face[l] << std::endl; ;
// }
// }
} }
if (score_face > threshold_face_detection) { if (score_face > threshold_face_detection) {

View File

@ -7,11 +7,12 @@
#include "gstnvdsmeta.h" #include "gstnvdsmeta.h"
#include "nvds_version.h" #include "nvds_version.h"
// #include "nvdsinfer_custom_impl.h" // #include "nvdsinfer_custom_impl.h"
#include <immintrin.h> // for AVX intrinsics
#include "config_manager.hpp"
#include "custom_gstnvdsinfer.hpp"
#include "nvdsmeta.h" #include "nvdsmeta.h"
#include "nvdsmeta_schema.h" #include "nvdsmeta_schema.h"
#include "custom_gstnvdsinfer.hpp"
#include "config_manager.hpp"
class FaceNvInferServerManager { class FaceNvInferServerManager {
private: private:
@ -51,8 +52,7 @@ class FaceNvInferServerManager {
// static gpointer copy_user_meta(gpointer, gpointer); // static gpointer copy_user_meta(gpointer, gpointer);
// static void release_user_meta(gpointer, gpointer); // static void release_user_meta(gpointer, gpointer);
static GstPadProbeReturn sgie_pad_buffer_probe(GstPad *, GstPadProbeInfo static GstPadProbeReturn sgie_pad_buffer_probe(GstPad *, GstPadProbeInfo *,
*,
gpointer); gpointer);
// static GstPadProbeReturn osd_sink_pad_buffer_probe_new(GstPad *, // static GstPadProbeReturn osd_sink_pad_buffer_probe_new(GstPad *,
// GstPadProbeInfo // GstPadProbeInfo
@ -60,7 +60,9 @@ class FaceNvInferServerManager {
static void *set_metadata_ptr(float *); static void *set_metadata_ptr(float *);
static gpointer copy_user_meta(gpointer, gpointer); static gpointer copy_user_meta(gpointer, gpointer);
static void release_user_meta(gpointer, gpointer); static void release_user_meta(gpointer, gpointer);
static NvOSD_RectParams * allign_postprocess(NvOSD_RectParams &, float*); static NvOSD_RectParams *allign_postprocess(NvOSD_RectParams &, float *);
static float numpy_clip(float, float, float); static float numpy_clip(float, float, float);
static void add_face_body(int, float); static void add_face_body(int, float);
static bool allZeroAVX(const float *, size_t);
static bool allZero(const float *, size_t);
}; };

View File

@ -30,7 +30,7 @@ bool NvOsdManager::create_nv_osd() {
} }
// Attach probe to a pad in the pipeline // Attach probe to a pad in the pipeline
void NvOsdManager::attach_probe_to_element() { void NvOsdManager::attach_probe_to_src_nvosd() {
GstPad *src_pad = gst_element_get_static_pad(nvosd, "src"); GstPad *src_pad = gst_element_get_static_pad(nvosd, "src");
if (!src_pad) { if (!src_pad) {
std::cerr << "Unable to get nvosd src pad\n"; std::cerr << "Unable to get nvosd src pad\n";
@ -119,3 +119,22 @@ GstPadProbeReturn NvOsdManager::osd_src_pad_buffer_probe(GstPad *pad,
frame_number++; frame_number++;
return GST_PAD_PROBE_OK; return GST_PAD_PROBE_OK;
} }
// Attach probe to a pad in the pipeline
void NvOsdManager::attach_probe_to_sink_nvosd() {
GstPad *sink_pad = gst_element_get_static_pad(nvosd, "sink");
if (!sink_pad) {
std::cerr << "Unable to get nvosd src pad\n";
return;
}
gst_pad_add_probe(sink_pad, GST_PAD_PROBE_TYPE_BUFFER,
osd_sink_pad_buffer_probe, NULL, NULL);
gst_object_unref(sink_pad);
}
GstPadProbeReturn NvOsdManager::osd_sink_pad_buffer_probe(GstPad *,
GstPadProbeInfo *,
gpointer) {
return GST_PAD_PROBE_OK;
}

View File

@ -13,8 +13,12 @@ class NvOsdManager {
bool create_nv_osd(); bool create_nv_osd();
~NvOsdManager(); ~NvOsdManager();
static gint frame_number; static gint frame_number;
void attach_probe_to_element(); void attach_probe_to_src_nvosd();
static GstPadProbeReturn osd_src_pad_buffer_probe(GstPad *, static GstPadProbeReturn osd_src_pad_buffer_probe(GstPad *,
GstPadProbeInfo *, GstPadProbeInfo *,
gpointer); gpointer);
void attach_probe_to_sink_nvosd();
static GstPadProbeReturn osd_sink_pad_buffer_probe(GstPad *,
GstPadProbeInfo *,
gpointer);
}; };

View File

@ -1,4 +1,5 @@
#include "pipeline_manager.hpp" #include "pipeline_manager.hpp"
#define GPU_ID 0
double PipelineManager::fps_buffer_probe = 0; double PipelineManager::fps_buffer_probe = 0;
double PipelineManager::fps_probe = 0; double PipelineManager::fps_probe = 0;
@ -595,6 +596,18 @@ bool PipelineManager::create_pipeline_elements(int num_sources,
sink_manager->create_sink(prop, rtsp_streaming_manager->host, sink_manager->create_sink(prop, rtsp_streaming_manager->host,
rtsp_streaming_manager->updsink_port_num); rtsp_streaming_manager->updsink_port_num);
sink_manager->create_fake_sink(); sink_manager->create_fake_sink();
// Create Context for Object Encoding.
// Creates and initializes an object encoder context.
// This context manages resources such as GPU memory, encoders, and
// parameters (resolution, quality, scaling, etc.) needed for encoding
// objects into images. create this once per pipeline.
NvDsObjEncCtxHandle obj_ctx_handle = nvds_obj_enc_create_context(GPU_ID);
if (!obj_ctx_handle) {
g_print("Unable to create context\n");
return -1;
}
nv_infer_server_manager->create_nv_infer_server(num_sources); nv_infer_server_manager->create_nv_infer_server(num_sources);
// GstElement *nvinfer = gst_bin_get_by_name(GST_BIN(pipeline), // GstElement *nvinfer = gst_bin_get_by_name(GST_BIN(pipeline),
@ -611,7 +624,7 @@ bool PipelineManager::create_pipeline_elements(int num_sources,
face_nv_infer_server_manager->create_face_nv_infer_server(num_sources); face_nv_infer_server_manager->create_face_nv_infer_server(num_sources);
nv_osd_manager nv_osd_manager
->attach_probe_to_element(); // nvinfer Or use "nvtracker" if after ->attach_probe_to_src_nvosd(); // nvinfer Or use "nvtracker" if after
message_handling->create_message_handler(pipeline, g_run_forever, loop); message_handling->create_message_handler(pipeline, g_run_forever, loop);
setup_pipeline(); setup_pipeline();
@ -639,7 +652,7 @@ bool PipelineManager::create_pipeline_elements(int num_sources,
face_nv_infer_server_manager->face_detector, "src"); face_nv_infer_server_manager->face_detector, "src");
gst_pad_add_probe(sgie_src_pad, GST_PAD_PROBE_TYPE_BUFFER, gst_pad_add_probe(sgie_src_pad, GST_PAD_PROBE_TYPE_BUFFER,
face_nv_infer_server_manager->sgie_pad_buffer_probe, face_nv_infer_server_manager->sgie_pad_buffer_probe,
&use_new_mux, NULL); (gpointer)obj_ctx_handle, NULL);
auto start = std::chrono::system_clock::now(); auto start = std::chrono::system_clock::now();
status_playing = playing_pipeline(num_sources, url_camera); status_playing = playing_pipeline(num_sources, url_camera);
@ -676,6 +689,7 @@ bool PipelineManager::create_pipeline_elements(int num_sources,
/* Out of the main loop, clean up nicely */ /* Out of the main loop, clean up nicely */
g_print("Returned, stopping playback \n"); g_print("Returned, stopping playback \n");
nvds_obj_enc_destroy_context(obj_ctx_handle);
/* Release the request pads from the tee, and unref them */ /* Release the request pads from the tee, and unref them */
gst_element_release_request_pad(tee_manager->tee, tee_manager->tee_msg_pad); gst_element_release_request_pad(tee_manager->tee, tee_manager->tee_msg_pad);
gst_element_release_request_pad(tee_manager->tee, gst_element_release_request_pad(tee_manager->tee,