Check embedding zero via simd
This commit is contained in:
parent
9e9b645b95
commit
fbc5a1ff96
@ -15,6 +15,17 @@ set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
|
||||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
||||
|
||||
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
|
||||
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx")# Enable all features your current CPU supports
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native")
|
||||
# Force AVX2
|
||||
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -mavx2")
|
||||
# Or force AVX-512
|
||||
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -mavx512f")
|
||||
|
||||
|
||||
|
||||
|
||||
option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
|
||||
|
||||
# For larger projects
|
||||
@ -149,4 +160,5 @@ target_link_libraries(${PROJECT_NAME} nvdsgst_infer nvds_meta nvds_inferutils
|
||||
nvdsgst_meta nvds_utils nvdsgst_helper
|
||||
prometheus-cpp-core prometheus-cpp-pull # prometheus-cpp-exposer nvdsgst_metnvdsa
|
||||
microhttpd
|
||||
nvdsgst_nvmultiurisrcbin)
|
||||
nvdsgst_nvmultiurisrcbin
|
||||
nvds_batch_jpegenc)
|
||||
@ -65,5 +65,6 @@
|
||||
"redis_broker_host": "localhost",
|
||||
"redis_broker_port": 6379,
|
||||
"topic_redis": "redis_stream"
|
||||
}
|
||||
},
|
||||
"compression_coefficient": 0.125
|
||||
}
|
||||
@ -45,7 +45,7 @@ FaceNvInferServerManager::FaceNvInferServerManager() {
|
||||
// FACE_NET_HEIGHT = config["PGIE_NET_HEIGHT"];
|
||||
MUXER_OUTPUT_WIDTH = config["MUXER_OUTPUT_WIDTH"];
|
||||
MUXER_OUTPUT_HEIGHT = config["MUXER_OUTPUT_HEIGHT"];
|
||||
threshold_face_detection = config["threshold_body_detection"];
|
||||
threshold_face_detection = config["threshold_face_detection"];
|
||||
}
|
||||
|
||||
bool FaceNvInferServerManager::create_face_nv_infer_server(int num_sources) {
|
||||
@ -1148,6 +1148,58 @@ void FaceNvInferServerManager::add_face_body(int object_id, float face_score) {
|
||||
return;
|
||||
}
|
||||
|
||||
// AVX check function
|
||||
bool FaceNvInferServerManager::allZeroAVX(const float *data, size_t size) {
|
||||
size_t i = 0;
|
||||
__m256 zero = _mm256_setzero_ps(); // 8 floats of 0.0
|
||||
for (; i + 8 <= size; i += 8) {
|
||||
__m256 v = _mm256_loadu_ps(&data[i]); // load 8 floats
|
||||
__m256 cmp = _mm256_cmp_ps(v, zero, _CMP_NEQ_OQ); // compare != 0
|
||||
if (_mm256_movemask_ps(cmp)) return false; // if any != 0, exit
|
||||
}
|
||||
// leftover elements
|
||||
for (; i < size; ++i) {
|
||||
if (data[i] != 0.0f) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool FaceNvInferServerManager::allZero(const float *data, size_t size) {
|
||||
size_t i = 0;
|
||||
|
||||
#if defined(__AVX512F__)
|
||||
// 16 floats per iteration
|
||||
__m512 zero512 = _mm512_setzero_ps();
|
||||
for (; i + 16 <= size; i += 16) {
|
||||
__m512 v = _mm512_loadu_ps(&data[i]);
|
||||
__mmask16 cmp = _mm512_cmp_ps_mask(v, zero512, _CMP_NEQ_OQ);
|
||||
if (cmp) return false;
|
||||
}
|
||||
#elif defined(__AVX2__)
|
||||
// 8 floats per iteration
|
||||
__m256 zero256 = _mm256_setzero_ps();
|
||||
for (; i + 8 <= size; i += 8) {
|
||||
__m256 v = _mm256_loadu_ps(&data[i]);
|
||||
__m256 cmp = _mm256_cmp_ps(v, zero256, _CMP_NEQ_OQ);
|
||||
if (_mm256_movemask_ps(cmp)) return false;
|
||||
}
|
||||
#elif defined(__SSE__)
|
||||
// 4 floats per iteration
|
||||
__m128 zero128 = _mm_setzero_ps();
|
||||
for (; i + 4 <= size; i += 4) {
|
||||
__m128 v = _mm_loadu_ps(&data[i]);
|
||||
__m128 cmp = _mm_cmpneq_ps(v, zero128);
|
||||
if (_mm_movemask_ps(cmp)) return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
// leftover elements
|
||||
for (; i < size; ++i) {
|
||||
if (data[i] != 0.0f) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/* This is the buffer probe function that we have registered on the sink pad
|
||||
* of the tiler element. All SGIE infer elements in the pipeline shall attach
|
||||
* their NvDsInferTensorMeta to each object's metadata of each frame, here we
|
||||
@ -1156,11 +1208,22 @@ void FaceNvInferServerManager::add_face_body(int object_id, float face_score) {
|
||||
* metadata.
|
||||
*/
|
||||
GstPadProbeReturn FaceNvInferServerManager::sgie_pad_buffer_probe(
|
||||
GstPad *pad, GstPadProbeInfo *info, gpointer u_data) {
|
||||
GstPad *pad, GstPadProbeInfo *info, gpointer ctx) {
|
||||
(void)ctx;
|
||||
GstBuffer *buf = (GstBuffer *)info->data;
|
||||
GstMapInfo inmap = GST_MAP_INFO_INIT;
|
||||
if (!gst_buffer_map(buf, &inmap, GST_MAP_READ)) {
|
||||
GST_ERROR("input buffer mapinfo failed");
|
||||
return GST_PAD_PROBE_DROP;
|
||||
}
|
||||
NvBufSurface *ip_surf = (NvBufSurface *)inmap.data;
|
||||
gst_buffer_unmap(buf, &inmap);
|
||||
(void)ip_surf;
|
||||
|
||||
(void)pad;
|
||||
// static guint use_device_mem = 0;
|
||||
gboolean *use_new_mux = (gboolean *)u_data;
|
||||
(void)use_new_mux;
|
||||
// gboolean *use_new_mux = (gboolean *)u_data;
|
||||
// (void)use_new_mux;
|
||||
|
||||
static NvDsInferNetworkInfo networkInfo{FACE_NET_WIDTH, FACE_NET_HEIGHT, 3};
|
||||
// (void)networkInfo;
|
||||
@ -1168,6 +1231,7 @@ GstPadProbeReturn FaceNvInferServerManager::sgie_pad_buffer_probe(
|
||||
NvDsBatchMeta *batch_meta =
|
||||
gst_buffer_get_nvds_batch_meta(GST_BUFFER(info->data));
|
||||
if (!batch_meta) return GST_PAD_PROBE_OK;
|
||||
bool is_zero_embedding_vector;
|
||||
|
||||
/* Iterate each frame metadata in batch */
|
||||
for (NvDsMetaList *l_frame = batch_meta->frame_meta_list; l_frame != NULL;
|
||||
@ -1280,8 +1344,7 @@ GstPadProbeReturn FaceNvInferServerManager::sgie_pad_buffer_probe(
|
||||
|
||||
unsigned int numDims = layer.inferDims.numDims;
|
||||
unsigned int numElements = layer.inferDims.numElements;
|
||||
// (void)numElements;
|
||||
// (void)numDims;
|
||||
(void)numElements;
|
||||
|
||||
// std::cout << "Layer " << jkl << " (" << layer.layerName
|
||||
// << "):\n";
|
||||
@ -1298,11 +1361,12 @@ GstPadProbeReturn FaceNvInferServerManager::sgie_pad_buffer_probe(
|
||||
|
||||
float *data_face = static_cast<float *>(layer.buffer);
|
||||
if (!(strcmp(layer.layerName, "embedding") == 0)) {
|
||||
for (unsigned int xyz = 0; xyz < numElements; xyz++) {
|
||||
// std::cout << "data_face[" << xyz
|
||||
// << "]= " << data_face[xyz] <<
|
||||
// std::endl;
|
||||
}
|
||||
// for (unsigned int xyz = 0; xyz < numElements; xyz++)
|
||||
// {
|
||||
// std::cout << "data_face[" << xyz
|
||||
// << "]= " << data_face[xyz] <<
|
||||
// std::endl;
|
||||
// }
|
||||
if ((strcmp(layer.layerName, "bbox") == 0)) {
|
||||
for (int l = 0; l < 4; l++) {
|
||||
face_location[l] = data_face[l];
|
||||
@ -1313,21 +1377,27 @@ GstPadProbeReturn FaceNvInferServerManager::sgie_pad_buffer_probe(
|
||||
}
|
||||
if ((strcmp(layer.layerName, "score") == 0)) {
|
||||
score_face = data_face[0];
|
||||
// std::cout << "score_face= " << score_face <<
|
||||
// std::endl; if (score_face>0.9){
|
||||
// std::cout << "score_face= " << score_face
|
||||
// <<std::endl; if (score_face>0.9){
|
||||
// high_confidence_faces++;
|
||||
// std::cout << "high_confidence_faces= " <<
|
||||
// high_confidence_faces << std::endl;
|
||||
// std::quick_exit(0);
|
||||
// }
|
||||
}
|
||||
} else {
|
||||
is_zero_embedding_vector = allZero(data_face, 512);
|
||||
// std::cout<<"is_zero_embedding_vector =
|
||||
// "<<is_zero_embedding_vector<<std::endl; for (int l =
|
||||
// 0; l < 512; l++) {
|
||||
// std::cout << "face_location[" << l
|
||||
// << "]= " << data_face[l] << std::endl;
|
||||
// }
|
||||
}
|
||||
// else{
|
||||
// for (int l = 0; l < 512; l++) {
|
||||
// std::cout << "face_location[" << l
|
||||
// << "]= " << data_face[l] << std::endl;
|
||||
// }
|
||||
// }
|
||||
}
|
||||
// std::quick_exit(0);
|
||||
if (is_zero_embedding_vector == 0) {
|
||||
;
|
||||
}
|
||||
|
||||
if (score_face > threshold_face_detection) {
|
||||
|
||||
@ -7,11 +7,12 @@
|
||||
#include "gstnvdsmeta.h"
|
||||
#include "nvds_version.h"
|
||||
// #include "nvdsinfer_custom_impl.h"
|
||||
#include <immintrin.h> // for AVX intrinsics
|
||||
|
||||
#include "config_manager.hpp"
|
||||
#include "custom_gstnvdsinfer.hpp"
|
||||
#include "nvdsmeta.h"
|
||||
#include "nvdsmeta_schema.h"
|
||||
#include "custom_gstnvdsinfer.hpp"
|
||||
#include "config_manager.hpp"
|
||||
|
||||
|
||||
class FaceNvInferServerManager {
|
||||
private:
|
||||
@ -51,8 +52,7 @@ class FaceNvInferServerManager {
|
||||
// static gpointer copy_user_meta(gpointer, gpointer);
|
||||
// static void release_user_meta(gpointer, gpointer);
|
||||
|
||||
static GstPadProbeReturn sgie_pad_buffer_probe(GstPad *, GstPadProbeInfo
|
||||
*,
|
||||
static GstPadProbeReturn sgie_pad_buffer_probe(GstPad *, GstPadProbeInfo *,
|
||||
gpointer);
|
||||
// static GstPadProbeReturn osd_sink_pad_buffer_probe_new(GstPad *,
|
||||
// GstPadProbeInfo
|
||||
@ -60,7 +60,9 @@ class FaceNvInferServerManager {
|
||||
static void *set_metadata_ptr(float *);
|
||||
static gpointer copy_user_meta(gpointer, gpointer);
|
||||
static void release_user_meta(gpointer, gpointer);
|
||||
static NvOSD_RectParams * allign_postprocess(NvOSD_RectParams &, float*);
|
||||
static NvOSD_RectParams *allign_postprocess(NvOSD_RectParams &, float *);
|
||||
static float numpy_clip(float, float, float);
|
||||
static void add_face_body(int, float);
|
||||
static bool allZeroAVX(const float *, size_t);
|
||||
static bool allZero(const float *, size_t);
|
||||
};
|
||||
@ -30,7 +30,7 @@ bool NvOsdManager::create_nv_osd() {
|
||||
}
|
||||
|
||||
// Attach probe to a pad in the pipeline
|
||||
void NvOsdManager::attach_probe_to_element() {
|
||||
void NvOsdManager::attach_probe_to_src_nvosd() {
|
||||
GstPad *src_pad = gst_element_get_static_pad(nvosd, "src");
|
||||
if (!src_pad) {
|
||||
std::cerr << "Unable to get nvosd src pad\n";
|
||||
@ -119,3 +119,22 @@ GstPadProbeReturn NvOsdManager::osd_src_pad_buffer_probe(GstPad *pad,
|
||||
frame_number++;
|
||||
return GST_PAD_PROBE_OK;
|
||||
}
|
||||
|
||||
// Attach probe to a pad in the pipeline
|
||||
void NvOsdManager::attach_probe_to_sink_nvosd() {
|
||||
GstPad *sink_pad = gst_element_get_static_pad(nvosd, "sink");
|
||||
if (!sink_pad) {
|
||||
std::cerr << "Unable to get nvosd src pad\n";
|
||||
return;
|
||||
}
|
||||
|
||||
gst_pad_add_probe(sink_pad, GST_PAD_PROBE_TYPE_BUFFER,
|
||||
osd_sink_pad_buffer_probe, NULL, NULL);
|
||||
gst_object_unref(sink_pad);
|
||||
}
|
||||
|
||||
GstPadProbeReturn NvOsdManager::osd_sink_pad_buffer_probe(GstPad *,
|
||||
GstPadProbeInfo *,
|
||||
gpointer) {
|
||||
return GST_PAD_PROBE_OK;
|
||||
}
|
||||
@ -13,8 +13,12 @@ class NvOsdManager {
|
||||
bool create_nv_osd();
|
||||
~NvOsdManager();
|
||||
static gint frame_number;
|
||||
void attach_probe_to_element();
|
||||
void attach_probe_to_src_nvosd();
|
||||
static GstPadProbeReturn osd_src_pad_buffer_probe(GstPad *,
|
||||
GstPadProbeInfo *,
|
||||
gpointer);
|
||||
void attach_probe_to_sink_nvosd();
|
||||
static GstPadProbeReturn osd_sink_pad_buffer_probe(GstPad *,
|
||||
GstPadProbeInfo *,
|
||||
gpointer);
|
||||
};
|
||||
@ -1,4 +1,5 @@
|
||||
#include "pipeline_manager.hpp"
|
||||
#define GPU_ID 0
|
||||
|
||||
double PipelineManager::fps_buffer_probe = 0;
|
||||
double PipelineManager::fps_probe = 0;
|
||||
@ -595,6 +596,18 @@ bool PipelineManager::create_pipeline_elements(int num_sources,
|
||||
sink_manager->create_sink(prop, rtsp_streaming_manager->host,
|
||||
rtsp_streaming_manager->updsink_port_num);
|
||||
sink_manager->create_fake_sink();
|
||||
|
||||
// Create Context for Object Encoding.
|
||||
// Creates and initializes an object encoder context.
|
||||
// This context manages resources such as GPU memory, encoders, and
|
||||
// parameters (resolution, quality, scaling, etc.) needed for encoding
|
||||
// objects into images. create this once per pipeline.
|
||||
NvDsObjEncCtxHandle obj_ctx_handle = nvds_obj_enc_create_context(GPU_ID);
|
||||
if (!obj_ctx_handle) {
|
||||
g_print("Unable to create context\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
nv_infer_server_manager->create_nv_infer_server(num_sources);
|
||||
|
||||
// GstElement *nvinfer = gst_bin_get_by_name(GST_BIN(pipeline),
|
||||
@ -611,7 +624,7 @@ bool PipelineManager::create_pipeline_elements(int num_sources,
|
||||
face_nv_infer_server_manager->create_face_nv_infer_server(num_sources);
|
||||
|
||||
nv_osd_manager
|
||||
->attach_probe_to_element(); // nvinfer Or use "nvtracker" if after
|
||||
->attach_probe_to_src_nvosd(); // nvinfer Or use "nvtracker" if after
|
||||
|
||||
message_handling->create_message_handler(pipeline, g_run_forever, loop);
|
||||
setup_pipeline();
|
||||
@ -639,7 +652,7 @@ bool PipelineManager::create_pipeline_elements(int num_sources,
|
||||
face_nv_infer_server_manager->face_detector, "src");
|
||||
gst_pad_add_probe(sgie_src_pad, GST_PAD_PROBE_TYPE_BUFFER,
|
||||
face_nv_infer_server_manager->sgie_pad_buffer_probe,
|
||||
&use_new_mux, NULL);
|
||||
(gpointer)obj_ctx_handle, NULL);
|
||||
|
||||
auto start = std::chrono::system_clock::now();
|
||||
status_playing = playing_pipeline(num_sources, url_camera);
|
||||
@ -676,6 +689,7 @@ bool PipelineManager::create_pipeline_elements(int num_sources,
|
||||
/* Out of the main loop, clean up nicely */
|
||||
g_print("Returned, stopping playback \n");
|
||||
|
||||
nvds_obj_enc_destroy_context(obj_ctx_handle);
|
||||
/* Release the request pads from the tee, and unref them */
|
||||
gst_element_release_request_pad(tee_manager->tee, tee_manager->tee_msg_pad);
|
||||
gst_element_release_request_pad(tee_manager->tee,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user