From 31ff82123bdafa5c35a7cba368b10f705c40961a Mon Sep 17 00:00:00 2001 From: Barzan Hayati Date: Tue, 23 Sep 2025 19:23:59 +0000 Subject: [PATCH] Modify face warp --- face_post_process/face_warp/1/model.py | 74 ++++++++++++++++++-------- face_post_process/test.py | 4 +- face_post_process/test2.py | 4 +- 3 files changed, 54 insertions(+), 28 deletions(-) diff --git a/face_post_process/face_warp/1/model.py b/face_post_process/face_warp/1/model.py index a9bb759..07d4ed1 100644 --- a/face_post_process/face_warp/1/model.py +++ b/face_post_process/face_warp/1/model.py @@ -23,7 +23,7 @@ Notes: to 112x112 output width/height; matches typical ArcFace preprocessing. """ -import os +# import os import json import numpy as np import cv2 @@ -31,10 +31,19 @@ import cv2 import triton_python_backend_utils as pb_utils +# import logging + +# # Put this at the top of your script or inside initialize() +# logging.basicConfig(level=logging.INFO) +# logger = logging.getLogger(__name__) + + # --------------------------------------------------------------------------- # # Utility: build canonical destination template once and reuse # # --------------------------------------------------------------------------- # -def _canonical_template(output_w: int, output_h: int, scale_factor: float) -> np.ndarray: +def _canonical_template( + output_w: int, output_h: int, scale_factor: float +) -> np.ndarray: """ Compute canonical destination 5-point template scaled to the desired output size and zoomed by `scale_factor`. @@ -72,16 +81,16 @@ def _estimate_affine(src_kps: np.ndarray, dst_kps: np.ndarray) -> np.ndarray: Uses cv2.estimateAffinePartial2D with LMEDS for robustness. """ - # cv2 expects shape (N,2). Ensure contiguous float32. M, _ = cv2.estimateAffinePartial2D(src_kps, dst_kps, method=cv2.LMEDS) if M is None: # Fallback: identity with translation to keep image valid. - M = np.array([[1.0, 0.0, 0.0], - [0.0, 1.0, 0.0]], dtype=np.float32) + M = np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]], dtype=np.float32) return M.astype(np.float32) -def _warp_image_nchw(img_chw: np.ndarray, M: np.ndarray, out_w: int, out_h: int) -> np.ndarray: +def _warp_image_nchw( + img_chw: np.ndarray, M: np.ndarray, out_w: int, out_h: int +) -> np.ndarray: """ Warp a single NCHW FP32 image using affine matrix M into out size W,H. @@ -90,11 +99,17 @@ def _warp_image_nchw(img_chw: np.ndarray, M: np.ndarray, out_w: int, out_h: int) M: (2,3) float32 out_w, out_h: ints + Returns: (3,out_h,out_w) float32 aligned image. """ + # logger.info(f"shape of image is: {img_chw.shape}, type of image: {img_chw.dtype}, min: {img_chw.min()} , max is {img_chw.max()}") # Convert to HWC for cv2.warpAffine (expects HxW xC, BGR/RGB agnostic) img_hwc = np.transpose(img_chw, (1, 2, 0)) # H,W,C + img_hwc = ((img_hwc + 1.0) * 127.5).clip(0, 255).astype(np.uint8) + # Ithink input is between -1 to 1, so we change it to 0 , 255 uint + # img_hwc = ((img_hwc + 1) * 127.5).astype(np.uint8) + # cv2.imwrite('/models/input_of_warp.jpg', img_hwc) warped = cv2.warpAffine( img_hwc, M, @@ -102,9 +117,17 @@ def _warp_image_nchw(img_chw: np.ndarray, M: np.ndarray, out_w: int, out_h: int) flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE, ) + # make it bgr: + # warped = warped[..., ::-1] + # logger.info(f"shape of warped is: {warped.shape}, type of image: {warped.dtype}, min: {warped.min()} , max is {warped.max()}") + # warped.astype(np.float32) # Back to NCHW - warped_chw = np.transpose(warped, (2, 0, 1)) - return warped_chw.astype(np.float32) + # cv2.imwrite('/models/warped.jpg', warped) + warped = np.transpose(warped, (2, 0, 1)) + warped = ((warped.astype(np.float32) / 255.0) - 0.5) / 0.5 + # warped = ((warped /warped.max()) - 0.5) / 0.5 + # logger.info(f"after preproces for embeding: shape of warped is: {warped.shape}, type of image: {warped.dtype}, min: {warped.min()} , max is {warped.max()}") + return warped class TritonPythonModel: @@ -117,9 +140,11 @@ class TritonPythonModel: Called once when the model is loaded. """ # Parse model config to get default scale factor (if provided). - model_config = json.loads(args['model_config']) - params = model_config.get('parameters', {}) - self.default_scale = float(params.get('scale_factor', {}).get('string_value', '1.0')) + model_config = json.loads(args["model_config"]) + params = model_config.get("parameters", {}) + self.default_scale = float( + params.get("scale_factor", {}).get("string_value", "1.0") + ) # Output dimensions from config; we assume fixed 112x112. # (We could parse from config but we'll hardcode to match pbtxt.) @@ -127,7 +152,7 @@ class TritonPythonModel: self.out_h = 112 # Precompute base canonical template for default scale (will adjust per‑sample if needed). - self.base_template = _canonical_template(self.out_w, self.out_h, 1.0) + self.base_template = _canonical_template(self.out_w, self.out_h, 0.93) self.embeding_model_name = "face_embeding" def execute(self, requests): @@ -135,16 +160,14 @@ class TritonPythonModel: for request in requests: # ---- Fetch tensors ---- + # print("hi, new sample") in_img_tensor = pb_utils.get_input_tensor_by_name(request, "input") in_lmk_tensor = pb_utils.get_input_tensor_by_name(request, "landmarks") score_tensor = pb_utils.get_input_tensor_by_name(request, "score") - - imgs = in_img_tensor.as_numpy() # [B,3,160,160] - lmks = in_lmk_tensor.as_numpy() # [B,5,2] - scores = score_tensor.as_numpy() # [B,1] - - + imgs = in_img_tensor.as_numpy() # [B,3,160,160] + lmks = in_lmk_tensor.as_numpy() # [B,5,2] + scores = score_tensor.as_numpy() # [B,1] # Ensure batch dimension if imgs.ndim == 3: @@ -168,11 +191,15 @@ class TritonPythonModel: if score < 0.9: continue # Skip, leave embedding as zero src_img = imgs[i] - src_kps = lmks[i].astype(np.float32) + src_kps = lmks[i].astype(np.float32) * 160 # Align dst_kps = self.base_template + M = _estimate_affine(src_kps, dst_kps) + # logger.info(f"src_kps(input): {src_kps}") + # logger.info(f"dst_kps(grandtruth): {dst_kps}") + # logger.info(f"M is : {M}") warped = _warp_image_nchw(src_img, M, self.out_w, self.out_h) aligned_imgs.append(warped) @@ -182,17 +209,20 @@ class TritonPythonModel: if aligned_imgs: aligned_batch = np.stack(aligned_imgs) # shape: [valid_N, 3, 112, 112] + # logger.info(f"shape of input of embeding batch : {aligned_batch.shape}, type of image: {aligned_batch.dtype}, min: {aligned_batch.min()} , max is {aligned_batch.max()}") infer_input = pb_utils.Tensor("input", aligned_batch) inference_request = pb_utils.InferenceRequest( model_name=self.embeding_model_name, requested_output_names=["output"], - inputs=[infer_input] + inputs=[infer_input], ) inference_response = inference_request.exec() embedding_tensor_list = inference_response.output_tensors() - responses.append(pb_utils.InferenceResponse(output_tensors=embedding_tensor_list)) + responses.append( + pb_utils.InferenceResponse(output_tensors=embedding_tensor_list) + ) return responses @@ -200,4 +230,4 @@ class TritonPythonModel: """ Called when the model is being unloaded. Nothing to clean up here. """ - return \ No newline at end of file + return diff --git a/face_post_process/test.py b/face_post_process/test.py index 132c15f..decc130 100644 --- a/face_post_process/test.py +++ b/face_post_process/test.py @@ -17,9 +17,7 @@ output_tensors = [httpclient.InferRequestedOutput(name) for name in output_names # Send inference request response = client.infer( - model_name="face_recognition", - inputs=[input_tensor], - outputs=output_tensors + model_name="face_recognition", inputs=[input_tensor], outputs=output_tensors ) # Parse and print outputs diff --git a/face_post_process/test2.py b/face_post_process/test2.py index f28a47e..82710d9 100644 --- a/face_post_process/test2.py +++ b/face_post_process/test2.py @@ -37,9 +37,7 @@ output_tensors = [httpclient.InferRequestedOutput(name) for name in output_names # Send inference request response = client.infer( - model_name="face_recognition", - inputs=[input_tensor], - outputs=output_tensors + model_name="face_recognition", inputs=[input_tensor], outputs=output_tensors ) # -----------------------------