Fix segmentation fault by adjust body face location

2025-09-26 22:28:01 +00:00 · 2025-09-26 22:28:01 +00:00 · 8334c1806a
commit 8334c1806a
parent 6286841117
2 changed files with 97 additions and 6 deletions
--- a/src/nv_infer_server_manager.cpp
+++ b/src/nv_infer_server_manager.cpp
@ -231,9 +231,6 @@ GstPadProbeReturn NvInferServerManager::pgie_pad_buffer_probe(
         l_frame = l_frame->next) {
        NvDsFrameMeta *frame_meta = (NvDsFrameMeta *)l_frame->data;

-        // std::cout << "frame number = " << frame_meta->frame_num
-        //             << " frame id = " << frame_meta->source_id << std::endl;
-
        // to solve track not showing up issue
        nvds_acquire_meta_lock(batch_meta);
        frame_meta->bInferDone = TRUE;
@ -241,10 +238,45 @@ GstPadProbeReturn NvInferServerManager::pgie_pad_buffer_probe(
        if (*use_new_mux) {
            stream_width = frame_meta->source_frame_width;
            stream_height = frame_meta->source_frame_height;
+            // These values come from the actual source (decoder) frame size
+            // before batching/muxing. They tell us the native resolution of the
+            // incoming camera/RTSP/file. If have multiple sources with
+            // different resolutions, these values can differ per source/frame.
+            // Use this if need the original stream resolution (e.g., for
+            // scaling bounding boxes back to source coordinates).
        } else {
            stream_width = MUXER_OUTPUT_WIDTH;
            stream_height = MUXER_OUTPUT_HEIGHT;
+            // These are the dimensions configured in nvstreammux (width /
+            // height). All sources fed into the muxer get scaled/padded to this
+            // resolution before being passed to downstream elements (like
+            // PGIE). So PGIE always “sees” frames at muxer resolution, not the
+            // raw input resolution. Use this if need the effective frame size
+            // that PGIE is processing (i.e., what TensorRT sees).
        }
+        // Inside PGIE (nvinfer), the correct dimensions are the muxer output
+        // width/height, because frames are resized by nvstreammux before
+        // inference. If want the original camera’s resolution, use
+        // frame_meta->source_frame_width / source_frame_height.
+        // nvmultiurisrcbin internally creates a nvstreammux before sending
+        // buffers downstream. That means by the time PGIE sees frames, they are
+        // already scaled to the muxer’s output size.
+
+        // At PGIE input, the frame resolution is the muxer’s configured output
+        // size. Therefore the correct dimensions for PGIE are: stream_width  =
+        // MUXER_OUTPUT_WIDTH; stream_height = MUXER_OUTPUT_HEIGHT; Why not
+        // frame_meta->source_frame_width? Those fields still exist in
+        // frame_meta, but they represent the original source stream resolution
+        // (camera/file). Since PGIE never directly sees that resolution (it
+        // only sees muxed frames), using these values inside PGIE would be
+        // misleading.
+
+        // For this pipeline, use MUXER_OUTPUT_WIDTH and MUXER_OUTPUT_HEIGHT to
+        // represent what PGIE actually processes. If later need to map
+        // detections back to the original stream resolution (e.g., for saving
+        // cropped images or re-streaming), then use
+        // frame_meta->source_frame_width and source_frame_height for scaling.
+
        (void)stream_height;
        (void)stream_width;
        uint detected_persons = 0;
@ -429,6 +461,8 @@ void NvInferServerManager::update_frame_with_face_body_meta(
            ((left_down_shoulder.y - data[index * BODY_TENSOR_SIZE + 1]) *
             MUXER_OUTPUT_HEIGHT / PGIE_NET_HEIGHT);

+        clamp_rect_params(frame_meta, &rect_params_imprecise_face, "FACE");
+
        /* Border of width 3. */
        rect_params_imprecise_face.border_width = 3;
        rect_params_imprecise_face.has_bg_color = 0;
@ -488,6 +522,8 @@ void NvInferServerManager::update_frame_with_face_body_meta(
                                    data[index * BODY_TENSOR_SIZE + 1]) *
                                   MUXER_OUTPUT_HEIGHT / PGIE_NET_HEIGHT);

+        clamp_rect_params(frame_meta, &rect_params_body, "BODY");
+
        /* Border of width 3. */
        rect_params_body.border_width = 3;
        rect_params_body.has_bg_color = 0;
@ -498,8 +534,11 @@ void NvInferServerManager::update_frame_with_face_body_meta(
        // text_params.display_text = g_strdup_printf("ImpreciseFace %lu",
        //     face_obj->object_id);
        /* Display text above the left top corner of the object. */
-        text_params_body.x_offset = rect_params_body.left - 30;
-        text_params_body.y_offset = rect_params_body.top - 30;
+        text_params_body.x_offset =
+            (rect_params_body.left - 30 < 0) ? 0 : rect_params_body.left - 30;
+        text_params_body.y_offset =
+            (rect_params_body.top - 30 < 0) ? 0 : rect_params_body.top - 30;
+
        /* Set black background for the text. */
        text_params_body.set_bg_clr = 1;
        text_params_body.text_bg_clr = NvOSD_ColorParams{0, 0, 0, 1};
@ -522,9 +561,56 @@ void NvInferServerManager::update_frame_with_face_body_meta(
    }
 }

+void NvInferServerManager::clamp_rect_params(NvDsFrameMeta *frame_meta,
+                                             NvOSD_RectParams *rect_params,
+                                             std::string type) {
+    (void)type;
+    guint frame_width = frame_meta->source_frame_width;
+    guint frame_height = frame_meta->source_frame_height;
+
+    // read values (DeepStream stores rect params as floats)
+    float left = rect_params->left;
+    float top = rect_params->top;
+    float width = rect_params->width;
+    float height = rect_params->height;
+    float right = left + width;
+    float bottom = top + height;
+
+    // CHECK for invalid numbers (NaN/inf) or out-of-bounds
+    bool invalid = false;
+    if (!std::isfinite(left) || !std::isfinite(top) || !std::isfinite(width) ||
+        !std::isfinite(height)) {
+        invalid = true;
+    } else if (width <= 0.0f || height <= 0.0f) {
+        invalid = true;
+    }
+
+    // clamp coordinates into frame (clip)
+    float clamped_left =
+        std::max(0.0f, std::min(left, (float)frame_width - 1.0f));
+    float clamped_top =
+        std::max(0.0f, std::min(top, (float)frame_height - 1.0f));
+    float clamped_right = abs(std::min(right, (float)frame_width - 1.0f));
+    float clamped_bottom = abs(std::min(bottom, (float)frame_height - 1.0f));
+
+    float clamped_w = clamped_right - clamped_left;
+    float clamped_h = clamped_bottom - clamped_top;
+    if (clamped_w <= 0.0f || clamped_h <= 0.0f) {
+        invalid = true;
+    }
+    (void)invalid;
+
+    rect_params->left = clamped_left;
+    rect_params->top = clamped_top;
+    rect_params->width = clamped_w;
+    rect_params->height = clamped_h;
+    return;
+}
+
 NvInferServerManager::Point2D
 NvInferServerManager::find_left_down_corner_shoulder(float *data, uint index) {
    Point2D left_down_shoulder;
+    // rightmost shoulder point in the BODY!
    if (data[index * BODY_TENSOR_SIZE + 21] >
        data[index * BODY_TENSOR_SIZE + 24]) {
        left_down_shoulder.x = data[index * BODY_TENSOR_SIZE + 21];
--- a/src/nv_infer_server_manager.hpp
+++ b/src/nv_infer_server_manager.hpp
@ -5,10 +5,13 @@

 #include "config_manager.hpp"
 // #include "gstnvdsinfer.h"
+#include <algorithm>
+#include <cmath>
+
+#include "custom_gstnvdsinfer.hpp"
 #include "gstnvdsmeta.h"
 #include "nvds_version.h"
 #include "nvdsinfer_custom_impl.h"
-#include "custom_gstnvdsinfer.hpp"

 class NvInferServerManager {
   private:
@ -53,4 +56,6 @@ class NvInferServerManager {
    static uint extract_tensor_metadata(NvDsUserMeta *, NvDsInferNetworkInfo,
                                        NvDsBatchMeta *, NvDsFrameMeta *);
    static Point2D find_left_down_corner_shoulder(float *, uint);
+    static void clamp_rect_params(NvDsFrameMeta *, NvOSD_RectParams *,
+                                  std::string);
 };