Extract tighter face bounding box

This commit is contained in:
Barzan Hayati 2025-08-30 22:35:38 +00:00
parent 51e4265ce8
commit 3b26a65f8a
2 changed files with 330 additions and 208 deletions

View File

@ -89,6 +89,85 @@ void NvTrackerManager::attach_probe_to_element() {
gst_object_unref(src_pad);
}
// face_bbox, face_score = face_box_extract(result["keypoints"], result["bbox"])
std::optional<std::tuple<std::tuple<float, float, float, float>, float>>
NvTrackerManager::face_box_extract(float *user_meta_data) { //, bbox
// Crop the head (face + ears + top of shoulders) from pose keypoints.
// Returns:
// (x_min, y_top, x_max, y_bottom, avg_score) if face detected, else
// None
float score_threshold = 0.5;
float padding = 0.2;
// KP = {
// "nose": 0, //6, 7, 8
// "left_eye": 1, //9, 10, 11
// "right_eye": 2, //12, 13, 14
// "left_ear": 3, //15, 16, 17
// "right_ear": 4, //18, 19, 20
// "left_shoulder": 5, //21, 22, 23
// "right_shoulder": 6 //24, 25, 26
// }
// Step 1: Check if face is present
float nose_score = user_meta_data[8];
float leye_score = user_meta_data[11];
float reye_score = user_meta_data[14];
if (!(nose_score > score_threshold and leye_score > score_threshold and
reye_score > THRESHOLD_LANDMARKS))
return std::nullopt; //, None;
float avg_score = (nose_score + leye_score + reye_score) / 3;
// Step 2: Person bounding box
float x1_box = user_meta_data[0];
float y1_box = user_meta_data[1];
float x2_box = x1_box + user_meta_data[2];
float y2_box = y1_box + user_meta_data[3];
// Step 3: Horizontal bounds
// x_left = (
// keypoints[KP["left_ear"]]["x"]
// if keypoints[KP["left_ear"]]["score"] > THRESHOLD_LANDMARKS
// else keypoints[KP["left_eye"]]["x"]
// )
float x_left = (user_meta_data[17] > THRESHOLD_LANDMARKS)
? user_meta_data[15]
: user_meta_data[9];
// x_right = (
// keypoints[KP["right_ear"]]["x"]
// if keypoints[KP["right_ear"]]["score"] > THRESHOLD_LANDMARKS
// else keypoints[KP["right_eye"]]["x"]
// )
float x_right = (user_meta_data[20] > THRESHOLD_LANDMARKS)
? user_meta_data[18]
: user_meta_data[12];
float x_min = std::min(x_left, x_right);
float x_max = std::max(x_left, x_right);
x_min = x_min - padding * (x_max - x_min);
x_max = x_max + padding * (x_max - x_min);
// Step 4: Vertical bounds
float y_top = y1_box;
// shoulders_y = [keypoints[KP["left_shoulder"]]["y"],
// keypoints[KP["right_shoulder"]]["y"]]
float y_bottom = std::max(user_meta_data[22], user_meta_data[25]);
// y_bottom = int(max(shoulders_y))
// Clip to person bounding box
x_min = std::max(x_min, x1_box);
x_max = std::min(x_max, x2_box);
y_top = std::max<float>(y_top, 0);
y_bottom = std::min(y_bottom, y2_box);
return std::make_tuple(std::make_tuple(x_min, y_top, x_max, y_bottom),
avg_score);
}
/* This is the buffer probe function that we have registered on the sink pad
* of the OSD element. All the infer elements in the pipeline shall attach
* their metadata to the GstBuffer, here we will iterate & process the metadata
@ -96,10 +175,10 @@ void NvTrackerManager::attach_probe_to_element() {
GstPadProbeReturn NvTrackerManager::tracker_src_pad_buffer_probe(
GstPad *pad, GstPadProbeInfo *info, gpointer u_data) {
(void)pad;
// (void)u_data;
(void)u_data;
// Cast user_data back to NvTrackerManager*
NvTrackerManager *manager = static_cast<NvTrackerManager *>(u_data);
// NvTrackerManager *manager = static_cast<NvTrackerManager *>(u_data);
GstBuffer *buf = (GstBuffer *)info->data;
guint num_rects = 0;
@ -141,7 +220,7 @@ GstPadProbeReturn NvTrackerManager::tracker_src_pad_buffer_probe(
NvDsUserMeta *user_meta = NULL;
NvDsMetaList *l_user_meta = NULL;
float *user_meta_data = NULL;
uint index = 0;
// uint index = 0;
for (l_user_meta = obj_meta->obj_user_meta_list;
l_user_meta != NULL; l_user_meta = l_user_meta->next) {
user_meta = (NvDsUserMeta *)(l_user_meta->data);
@ -175,216 +254,254 @@ GstPadProbeReturn NvTrackerManager::tracker_src_pad_buffer_probe(
// obj_meta->object_id = UNTRACKED_OBJECT_ID;
// obj_meta->class_id = 0;
if (!(user_meta_data[index * 57 + 8] >
THRESHOLD_LANDMARKS &&
user_meta_data[index * 57 + 11] >
THRESHOLD_LANDMARKS &&
user_meta_data[index * 57 + 14] >
THRESHOLD_LANDMARKS &&
user_meta_data[index * 57 + 17] >
THRESHOLD_LANDMARKS &&
user_meta_data[index * 57 + 20] >
THRESHOLD_LANDMARKS &&
user_meta_data[index * 57 + 23] >
THRESHOLD_LANDMARKS &&
user_meta_data[index * 57 + 26] >
THRESHOLD_LANDMARKS)) {
continue;
}
// NvOSD_RectParams &face_rect_params;
// NvOSD_RectParams *face_rect_params = nullptr; // Fill
// face_rect_params.top, .left, .width, .height
NvOSD_RectParams *face_rect_params = new NvOSD_RectParams();
face_rect_params->left = user_meta_data[index * 57 + 0];
face_rect_params->top = user_meta_data[index * 57 + 1];
/* Assign bounding box coordinates. */
// Right Shoulder - Left Shoulder
if (user_meta_data[index * 57 + 24] >
user_meta_data[index * 57 + 21]) {
face_rect_params->width =
abs((user_meta_data[index * 57 + 24] -
user_meta_data[index * 57 + 0]) *
MUXER_OUTPUT_WIDTH / PGIE_NET_WIDTH);
} else {
face_rect_params->width =
abs((user_meta_data[index * 57 + 21] -
user_meta_data[index * 57 + 0]) *
MUXER_OUTPUT_WIDTH / PGIE_NET_WIDTH);
}
if (user_meta_data[index * 57 + 25] >
user_meta_data[index * 57 + 22]) {
face_rect_params->height =
abs((user_meta_data[index * 57 + 25] -
user_meta_data[index * 57 + 1]) *
MUXER_OUTPUT_WIDTH / PGIE_NET_WIDTH);
} else {
face_rect_params->height =
abs((user_meta_data[index * 57 + 22] -
user_meta_data[index * 57 + 1]) *
MUXER_OUTPUT_WIDTH / PGIE_NET_WIDTH);
}
NvDsObjectMeta *face_obj =
nvds_acquire_obj_meta_from_pool(batch_meta);
face_obj->unique_component_id =
IMPRECISE_FACE_COMPONENT_ID; // 1; // Use a new
// component ID
face_obj->rect_params = *face_rect_params;
face_obj->rect_params.has_bg_color = 0;
face_obj->rect_params.border_width = 2;
face_obj->rect_params.border_color =
NvOSD_ColorParams{0.0, 0.0, 1.0, 1.0}; // Blue box
// FaceCandidate *face_candidate = new FaceCandidate();
FaceCandidTrace::FaceCandidate *face_candidate =
new FaceCandidTrace::FaceCandidate();
// NvTrackerManager::FaceCandidate* face_candidate = new
// NvTrackerManager::FaceCandidate();
// manager->face_candidate
face_candidate->frame_number = frame_meta->frame_num;
face_candidate->h = face_rect_params->height;
face_candidate->w = face_rect_params->width;
face_candidate->face_score = manager->get_face_score(
user_meta_data); // manager->face_score;
face_obj->confidence = face_candidate->face_score; // 1.0;
face_candidate->object_id = obj_meta->object_id;
face_obj->object_id = obj_meta->object_id;
face_candidate->source_id = frame_meta->source_id;
bool add_status = face_candidate_trace->add(face_candidate);
if (add_status) {
face_obj->class_id = FACE_CLASS_ID;
} else {
face_obj->class_id = 41;
}
NvOSD_TextParams &text_params = face_obj->text_params;
NvOSD_RectParams &rect_params = face_obj->rect_params;
/* display_text requires heap allocated memory. */
// Instead of letting OSD auto-generate text, set your own
text_params.display_text = g_strdup_printf(
"ImpreciseFace_Tracker %lu", face_obj->object_id);
// printf("Imprecise Face ID: %lu, Precise Face ID: %lu\n",
// obj_meta->object_id, final_face_obj->object_id);
/* Display text above the left top corner of the object.*/
text_params.x_offset = rect_params.left;
text_params.y_offset = rect_params.top + 30;
/* Set black background for the text. */
text_params.set_bg_clr = 1;
text_params.text_bg_clr = NvOSD_ColorParams{0, 0, 0, 1};
/* Font face, size and color. */
text_params.font_params.font_name = (gchar *)"Serif";
text_params.font_params.font_size = 11;
text_params.font_params.font_color =
NvOSD_ColorParams{1, 1, 1, 1};
// std::cout << "In Tracker sink "
// << " source_id = " << frame_meta->source_id
// << " object_id = " << obj_meta->object_id
// << " x = " << obj_meta->rect_params.left
// << " y = " << obj_meta->rect_params.top
// << " w = " << obj_meta->rect_params.width
// << " h = " << obj_meta->rect_params.height
// << " score = " << obj_meta->confidence
// << std::endl;
// bool is_area_updated = false;
// FaceBody current_face;
// current_face.largest_area = face_obj->rect_params.height
// *
// face_obj->rect_params.width;
// current_face.object_id = obj_meta->object_id;
// current_face.source_id = frame_meta->source_id;
// if (!check_existence(
// obj_meta->object_id, current_face.source_id,
// current_face.largest_area, &is_area_updated)) {
// current_face.num_frames = 1;
// body_face_list.push_back(current_face);
// std::cout << "source_id = " << current_face.source_id
// << " frame_num = " << frame_meta->frame_num
// << " object_id = " << obj_meta->object_id
// << " size body_face_list = "
// << body_face_list.size() << std::endl;
// face_obj->class_id = FACE_CLASS_ID;
// }
// if (is_area_updated) {
// face_obj->class_id = FACE_CLASS_ID;
// std::cout << "source_id = " << current_face.source_id
// << " frame_num = " << frame_meta->frame_num
// << " object_id = " << obj_meta->object_id
// << " area is updated" << std::endl;
// } else {
// face_obj->class_id = 41;
// // std::cout<<"not is_area_updated "<< std::endl;
// if (!(user_meta_data[index * 57 + 8] >
// THRESHOLD_LANDMARKS &&
// user_meta_data[index * 57 + 11] >
// THRESHOLD_LANDMARKS &&
// user_meta_data[index * 57 + 14] >
// THRESHOLD_LANDMARKS &&
// user_meta_data[index * 57 + 17] >
// THRESHOLD_LANDMARKS &&
// user_meta_data[index * 57 + 20] >
// THRESHOLD_LANDMARKS &&
// user_meta_data[index * 57 + 23] >
// THRESHOLD_LANDMARKS &&
// user_meta_data[index * 57 + 26] >
// THRESHOLD_LANDMARKS)) {
// continue;
// }
// NvOSD_RectParams &rect_params = obj_meta->rect_params;
// NvOSD_TextParams &text_params = obj_meta->text_params;
/* Assign bounding box coordinates. */
// rect_params.left = int(data[index * 57 + 0] *
// MUXER_OUTPUT_WIDTH /
// PGIE_NET_WIDTH);
// rect_params.top = int(data[index * 57 + 1] *
// MUXER_OUTPUT_HEIGHT /
// PGIE_NET_HEIGHT);
// rect_params.width =
// int((data[index * 57 + 2] - data[index * 57 + 0]) *
// MUXER_OUTPUT_WIDTH / PGIE_NET_WIDTH);
// rect_params.height =
// int((data[index * 57 + 3] - data[index * 57 + 1]) *
// MUXER_OUTPUT_HEIGHT / PGIE_NET_HEIGHT);
auto result = face_box_extract(user_meta_data);
if (result.has_value()) {
// Extract the actual tuple from the optional
auto face_data = result.value();
// Now extract components from the tuple
auto &[x1, y1, x2, y2] = std::get<0>(face_data);
float &confidence = std::get<1>(face_data);
// std::cout << "nvinferserver second for x = " <<
// rect_params.left
// << " y = " << rect_params.top
// << " w = " << rect_params.width
// << " h = " << rect_params.height
// << " score = " << obj_meta->confidence <<
// std::endl;
// NvOSD_RectParams &face_rect_params;
// NvOSD_RectParams *face_rect_params = nullptr; // Fill
// face_rect_params.top, .left, .width, .height
NvOSD_RectParams *face_rect_params =
new NvOSD_RectParams();
// /* Border of width 3. */
// rect_params.border_width = 3;
// rect_params.has_bg_color = 0;
// rect_params.border_color = NvOSD_ColorParams{1, 0, 0, 1};
// /* display_text requires heap allocated memory. */
// text_params.display_text = g_strdup(pgie_class_str[0]);
// /* Display text above the left top corner of the object.
// */ text_params.x_offset = rect_params.left;
// text_params.y_offset = rect_params.top - 10;
// /* Set black background for the text. */
// text_params.set_bg_clr = 1;
// text_params.text_bg_clr = NvOSD_ColorParams{0, 0, 0, 1};
// /* Font face, size and color. */
// text_params.font_params.font_name = (gchar *)"Serif";
// text_params.font_params.font_size = 11;
// text_params.font_params.font_color =
// NvOSD_ColorParams{1, 1, 1, 1};
// adding landmarks to obj_meta as user_meta
// nvds_add_child_object(obj_meta, face_obj);
// nvds_attach_obj_meta(obj_meta, face_obj, NULL);
face_rect_params->left =
x1; // user_meta_data[index * 57 + 0];
face_rect_params->top =
y1; // user_meta_data[index * 57 + 1];
// NvDsUserMeta *um1 =
// nvds_acquire_user_meta_from_pool(batch_meta);
// um1->user_meta_data =
// set_metadata_ptr(&(data[index * 57 + 6]),
// source_id); // Add landmarks
// here
// um1->base_meta.meta_type =
// NVDS_USER_OBJECT_META_LANDMARKS_AND_SOURCE_ID;
// um1->base_meta.copy_func =
// (NvDsMetaCopyFunc)copy_user_meta;
// um1->base_meta.release_func =
// (NvDsMetaReleaseFunc)release_user_meta;
// nvds_add_user_meta_to_obj(obj_meta, um1);
// nvds_add_obj_meta_to_frame(frame_meta, obj_meta, NULL);
nvds_add_obj_meta_to_frame(frame_meta, face_obj, obj_meta);
/* Assign bounding box coordinates. */
// Right Shoulder - Left Shoulder
// if (user_meta_data[index * 57 + 24] >
// user_meta_data[index * 57 + 21]) {
// face_rect_params->width =
// abs((user_meta_data[index * 57 + 24] -
// user_meta_data[index * 57 + 0]) *
// MUXER_OUTPUT_WIDTH / PGIE_NET_WIDTH);
// } else {
// face_rect_params->width =
// abs((user_meta_data[index * 57 + 21] -
// user_meta_data[index * 57 + 0]) *
// MUXER_OUTPUT_WIDTH / PGIE_NET_WIDTH);
// }
// if (user_meta_data[index * 57 + 25] >
// user_meta_data[index * 57 + 22]) {
// face_rect_params->height =
// abs((user_meta_data[index * 57 + 25] -
// user_meta_data[index * 57 + 1]) *
// MUXER_OUTPUT_WIDTH / PGIE_NET_WIDTH);
// } else {
// face_rect_params->height =
// abs((user_meta_data[index * 57 + 22] -
// user_meta_data[index * 57 + 1]) *
// MUXER_OUTPUT_WIDTH / PGIE_NET_WIDTH);
// }
face_rect_params->width = x2 - x1;
face_rect_params->height = y2 - y1;
NvDsObjectMeta *face_obj =
nvds_acquire_obj_meta_from_pool(batch_meta);
face_obj->unique_component_id =
IMPRECISE_FACE_COMPONENT_ID; // 1; // Use a new
// component ID
face_obj->rect_params = *face_rect_params;
face_obj->rect_params.has_bg_color = 0;
face_obj->rect_params.border_width = 2;
face_obj->rect_params.border_color =
NvOSD_ColorParams{0.0, 0.0, 1.0, 1.0}; // Blue box
// FaceCandidate *face_candidate = new FaceCandidate();
FaceCandidTrace::FaceCandidate *face_candidate =
new FaceCandidTrace::FaceCandidate();
// NvTrackerManager::FaceCandidate* face_candidate = new
// NvTrackerManager::FaceCandidate();
// manager->face_candidate
face_candidate->frame_number = frame_meta->frame_num;
face_candidate->h = face_rect_params->height;
face_candidate->w = face_rect_params->width;
face_candidate->face_score =
confidence; // manager->get_face_score(user_meta_data);
// // manager->face_score;
face_obj->confidence =
face_candidate->face_score; // 1.0;
face_candidate->object_id = obj_meta->object_id;
face_obj->object_id = obj_meta->object_id;
face_candidate->source_id = frame_meta->source_id;
bool add_status =
face_candidate_trace->add(face_candidate);
if (add_status) {
face_obj->class_id = FACE_CLASS_ID;
} else {
face_obj->class_id = 41;
}
NvOSD_TextParams &text_params = face_obj->text_params;
NvOSD_RectParams &rect_params = face_obj->rect_params;
/* display_text requires heap allocated memory. */
// Instead of letting OSD auto-generate text, set your
// own
text_params.display_text = g_strdup_printf(
"ImpreciseFace_Tracker %lu", face_obj->object_id);
// printf("Imprecise Face ID: %lu, Precise Face ID:
// %lu\n",
// obj_meta->object_id, final_face_obj->object_id);
/* Display text above the left top corner of the
* object.*/
text_params.x_offset = rect_params.left;
text_params.y_offset = rect_params.top + 30;
/* Set black background for the text. */
text_params.set_bg_clr = 1;
text_params.text_bg_clr = NvOSD_ColorParams{0, 0, 0, 1};
/* Font face, size and color. */
text_params.font_params.font_name = (gchar *)"Serif";
text_params.font_params.font_size = 11;
text_params.font_params.font_color =
NvOSD_ColorParams{1, 1, 1, 1};
// std::cout << "In Tracker sink "
// << " source_id = " << frame_meta->source_id
// << " object_id = " << obj_meta->object_id
// << " x = " << obj_meta->rect_params.left
// << " y = " << obj_meta->rect_params.top
// << " w = " << obj_meta->rect_params.width
// << " h = " << obj_meta->rect_params.height
// << " score = " << obj_meta->confidence
// << std::endl;
// bool is_area_updated = false;
// FaceBody current_face;
// current_face.largest_area =
// face_obj->rect_params.height
// *
// face_obj->rect_params.width;
// current_face.object_id = obj_meta->object_id;
// current_face.source_id = frame_meta->source_id;
// if (!check_existence(
// obj_meta->object_id, current_face.source_id,
// current_face.largest_area, &is_area_updated))
// {
// current_face.num_frames = 1;
// body_face_list.push_back(current_face);
// std::cout << "source_id = " <<
// current_face.source_id
// << " frame_num = " <<
// frame_meta->frame_num
// << " object_id = " <<
// obj_meta->object_id
// << " size body_face_list = "
// << body_face_list.size() << std::endl;
// face_obj->class_id = FACE_CLASS_ID;
// }
// if (is_area_updated) {
// face_obj->class_id = FACE_CLASS_ID;
// std::cout << "source_id = " <<
// current_face.source_id
// << " frame_num = " <<
// frame_meta->frame_num
// << " object_id = " <<
// obj_meta->object_id
// << " area is updated" << std::endl;
// } else {
// face_obj->class_id = 41;
// // std::cout<<"not is_area_updated "<< std::endl;
// }
// NvOSD_RectParams &rect_params =
// obj_meta->rect_params; NvOSD_TextParams &text_params
// = obj_meta->text_params;
/* Assign bounding box coordinates. */
// rect_params.left = int(data[index * 57 + 0] *
// MUXER_OUTPUT_WIDTH /
// PGIE_NET_WIDTH);
// rect_params.top = int(data[index * 57 + 1] *
// MUXER_OUTPUT_HEIGHT /
// PGIE_NET_HEIGHT);
// rect_params.width =
// int((data[index * 57 + 2] - data[index * 57 + 0])
// *
// MUXER_OUTPUT_WIDTH / PGIE_NET_WIDTH);
// rect_params.height =
// int((data[index * 57 + 3] - data[index * 57 + 1])
// *
// MUXER_OUTPUT_HEIGHT / PGIE_NET_HEIGHT);
// std::cout << "nvinferserver second for x = " <<
// rect_params.left
// << " y = " << rect_params.top
// << " w = " << rect_params.width
// << " h = " << rect_params.height
// << " score = " << obj_meta->confidence <<
// std::endl;
// /* Border of width 3. */
// rect_params.border_width = 3;
// rect_params.has_bg_color = 0;
// rect_params.border_color = NvOSD_ColorParams{1, 0, 0,
// 1};
// /* display_text requires heap allocated memory. */
// text_params.display_text =
// g_strdup(pgie_class_str[0]);
// /* Display text above the left top corner of the
// object.
// */ text_params.x_offset = rect_params.left;
// text_params.y_offset = rect_params.top - 10;
// /* Set black background for the text. */
// text_params.set_bg_clr = 1;
// text_params.text_bg_clr = NvOSD_ColorParams{0, 0, 0,
// 1};
// /* Font face, size and color. */
// text_params.font_params.font_name = (gchar *)"Serif";
// text_params.font_params.font_size = 11;
// text_params.font_params.font_color =
// NvOSD_ColorParams{1, 1, 1, 1};
// adding landmarks to obj_meta as user_meta
// nvds_add_child_object(obj_meta, face_obj);
// nvds_attach_obj_meta(obj_meta, face_obj, NULL);
// NvDsUserMeta *um1 =
// nvds_acquire_user_meta_from_pool(batch_meta);
// um1->user_meta_data =
// set_metadata_ptr(&(data[index * 57 + 6]),
// source_id); // Add landmarks
// here
// um1->base_meta.meta_type =
// NVDS_USER_OBJECT_META_LANDMARKS_AND_SOURCE_ID;
// um1->base_meta.copy_func =
// (NvDsMetaCopyFunc)copy_user_meta;
// um1->base_meta.release_func =
// (NvDsMetaReleaseFunc)release_user_meta;
// nvds_add_user_meta_to_obj(obj_meta, um1);
// nvds_add_obj_meta_to_frame(frame_meta, obj_meta,
// NULL);
nvds_add_obj_meta_to_frame(frame_meta, face_obj,
obj_meta);
}
}
// index++;
}

View File

@ -4,6 +4,8 @@
#include <fstream>
#include <iostream>
#include <iterator>
#include <optional>
#include <tuple> // for std::tuple
#include <vector>
#include "config_manager.hpp"
@ -52,4 +54,7 @@ class NvTrackerManager {
gpointer);
static bool check_existence(int, int, float, bool *);
float get_face_score(float *);
static std::optional<
std::tuple<std::tuple<float, float, float, float>, float>>
face_box_extract(float *);
};