diff --git a/src/nv_infer_server_manager.cpp b/src/nv_infer_server_manager.cpp index 3b3c0da..9ebafbb 100644 --- a/src/nv_infer_server_manager.cpp +++ b/src/nv_infer_server_manager.cpp @@ -231,9 +231,6 @@ GstPadProbeReturn NvInferServerManager::pgie_pad_buffer_probe( l_frame = l_frame->next) { NvDsFrameMeta *frame_meta = (NvDsFrameMeta *)l_frame->data; - // std::cout << "frame number = " << frame_meta->frame_num - // << " frame id = " << frame_meta->source_id << std::endl; - // to solve track not showing up issue nvds_acquire_meta_lock(batch_meta); frame_meta->bInferDone = TRUE; @@ -241,10 +238,45 @@ GstPadProbeReturn NvInferServerManager::pgie_pad_buffer_probe( if (*use_new_mux) { stream_width = frame_meta->source_frame_width; stream_height = frame_meta->source_frame_height; + // These values come from the actual source (decoder) frame size + // before batching/muxing. They tell us the native resolution of the + // incoming camera/RTSP/file. If have multiple sources with + // different resolutions, these values can differ per source/frame. + // Use this if need the original stream resolution (e.g., for + // scaling bounding boxes back to source coordinates). } else { stream_width = MUXER_OUTPUT_WIDTH; stream_height = MUXER_OUTPUT_HEIGHT; + // These are the dimensions configured in nvstreammux (width / + // height). All sources fed into the muxer get scaled/padded to this + // resolution before being passed to downstream elements (like + // PGIE). So PGIE always “sees” frames at muxer resolution, not the + // raw input resolution. Use this if need the effective frame size + // that PGIE is processing (i.e., what TensorRT sees). } + // Inside PGIE (nvinfer), the correct dimensions are the muxer output + // width/height, because frames are resized by nvstreammux before + // inference. If want the original camera’s resolution, use + // frame_meta->source_frame_width / source_frame_height. + // nvmultiurisrcbin internally creates a nvstreammux before sending + // buffers downstream. That means by the time PGIE sees frames, they are + // already scaled to the muxer’s output size. + + // At PGIE input, the frame resolution is the muxer’s configured output + // size. Therefore the correct dimensions for PGIE are: stream_width = + // MUXER_OUTPUT_WIDTH; stream_height = MUXER_OUTPUT_HEIGHT; Why not + // frame_meta->source_frame_width? Those fields still exist in + // frame_meta, but they represent the original source stream resolution + // (camera/file). Since PGIE never directly sees that resolution (it + // only sees muxed frames), using these values inside PGIE would be + // misleading. + + // For this pipeline, use MUXER_OUTPUT_WIDTH and MUXER_OUTPUT_HEIGHT to + // represent what PGIE actually processes. If later need to map + // detections back to the original stream resolution (e.g., for saving + // cropped images or re-streaming), then use + // frame_meta->source_frame_width and source_frame_height for scaling. + (void)stream_height; (void)stream_width; uint detected_persons = 0; @@ -429,6 +461,8 @@ void NvInferServerManager::update_frame_with_face_body_meta( ((left_down_shoulder.y - data[index * BODY_TENSOR_SIZE + 1]) * MUXER_OUTPUT_HEIGHT / PGIE_NET_HEIGHT); + clamp_rect_params(frame_meta, &rect_params_imprecise_face, "FACE"); + /* Border of width 3. */ rect_params_imprecise_face.border_width = 3; rect_params_imprecise_face.has_bg_color = 0; @@ -488,6 +522,8 @@ void NvInferServerManager::update_frame_with_face_body_meta( data[index * BODY_TENSOR_SIZE + 1]) * MUXER_OUTPUT_HEIGHT / PGIE_NET_HEIGHT); + clamp_rect_params(frame_meta, &rect_params_body, "BODY"); + /* Border of width 3. */ rect_params_body.border_width = 3; rect_params_body.has_bg_color = 0; @@ -498,8 +534,11 @@ void NvInferServerManager::update_frame_with_face_body_meta( // text_params.display_text = g_strdup_printf("ImpreciseFace %lu", // face_obj->object_id); /* Display text above the left top corner of the object. */ - text_params_body.x_offset = rect_params_body.left - 30; - text_params_body.y_offset = rect_params_body.top - 30; + text_params_body.x_offset = + (rect_params_body.left - 30 < 0) ? 0 : rect_params_body.left - 30; + text_params_body.y_offset = + (rect_params_body.top - 30 < 0) ? 0 : rect_params_body.top - 30; + /* Set black background for the text. */ text_params_body.set_bg_clr = 1; text_params_body.text_bg_clr = NvOSD_ColorParams{0, 0, 0, 1}; @@ -522,9 +561,56 @@ void NvInferServerManager::update_frame_with_face_body_meta( } } +void NvInferServerManager::clamp_rect_params(NvDsFrameMeta *frame_meta, + NvOSD_RectParams *rect_params, + std::string type) { + (void)type; + guint frame_width = frame_meta->source_frame_width; + guint frame_height = frame_meta->source_frame_height; + + // read values (DeepStream stores rect params as floats) + float left = rect_params->left; + float top = rect_params->top; + float width = rect_params->width; + float height = rect_params->height; + float right = left + width; + float bottom = top + height; + + // CHECK for invalid numbers (NaN/inf) or out-of-bounds + bool invalid = false; + if (!std::isfinite(left) || !std::isfinite(top) || !std::isfinite(width) || + !std::isfinite(height)) { + invalid = true; + } else if (width <= 0.0f || height <= 0.0f) { + invalid = true; + } + + // clamp coordinates into frame (clip) + float clamped_left = + std::max(0.0f, std::min(left, (float)frame_width - 1.0f)); + float clamped_top = + std::max(0.0f, std::min(top, (float)frame_height - 1.0f)); + float clamped_right = abs(std::min(right, (float)frame_width - 1.0f)); + float clamped_bottom = abs(std::min(bottom, (float)frame_height - 1.0f)); + + float clamped_w = clamped_right - clamped_left; + float clamped_h = clamped_bottom - clamped_top; + if (clamped_w <= 0.0f || clamped_h <= 0.0f) { + invalid = true; + } + (void)invalid; + + rect_params->left = clamped_left; + rect_params->top = clamped_top; + rect_params->width = clamped_w; + rect_params->height = clamped_h; + return; +} + NvInferServerManager::Point2D NvInferServerManager::find_left_down_corner_shoulder(float *data, uint index) { Point2D left_down_shoulder; + // rightmost shoulder point in the BODY! if (data[index * BODY_TENSOR_SIZE + 21] > data[index * BODY_TENSOR_SIZE + 24]) { left_down_shoulder.x = data[index * BODY_TENSOR_SIZE + 21]; diff --git a/src/nv_infer_server_manager.hpp b/src/nv_infer_server_manager.hpp index 3dc3d4d..66226d8 100644 --- a/src/nv_infer_server_manager.hpp +++ b/src/nv_infer_server_manager.hpp @@ -5,10 +5,13 @@ #include "config_manager.hpp" // #include "gstnvdsinfer.h" +#include +#include + +#include "custom_gstnvdsinfer.hpp" #include "gstnvdsmeta.h" #include "nvds_version.h" #include "nvdsinfer_custom_impl.h" -#include "custom_gstnvdsinfer.hpp" class NvInferServerManager { private: @@ -53,4 +56,6 @@ class NvInferServerManager { static uint extract_tensor_metadata(NvDsUserMeta *, NvDsInferNetworkInfo, NvDsBatchMeta *, NvDsFrameMeta *); static Point2D find_left_down_corner_shoulder(float *, uint); + static void clamp_rect_params(NvDsFrameMeta *, NvOSD_RectParams *, + std::string); }; \ No newline at end of file