Skip to content

Commit ca2d1bd

Browse files
Fix/yolov8face roi padding #1014 (#1017)
* Fix: YoloV8face converter does not apply ROI padding (#1014) * fixed readme for age_gender_rec * removed converter yolov5face * fixed left, top -> top, left annotations for coordinates
1 parent e139084 commit ca2d1bd

File tree

3 files changed

+16
-86
lines changed

3 files changed

+16
-86
lines changed

samples/age_gender_recognition/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Faces detection, tracking and age-gender recognition (YoloV5face, Nvidia Tracker, Age-Gender model)
1+
# Faces detection, tracking and age-gender recognition (YoloV8face, Nvidia Tracker, Age-Gender model)
22

33
**NB**: The demo uses **YOLOV8-Face** model which takes up to **30-40 minutes** to compile to TensorRT engine. The first launch takes an enormous time.
44

savant/converter/yolo_v5face.py

Lines changed: 0 additions & 80 deletions
This file was deleted.

savant/converter/yolo_v8face.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def __call__(
3939
:param output_layers: Output layer tensor
4040
:param model: Model definition, required parameters: input tensor shape,
4141
maintain_aspect_ratio
42-
:param roi: [top, left, width, height] of the rectangle
42+
:param roi: [left, top, width, height] of the rectangle
4343
on which the model infers
4444
:return: a combination of :py:class:`.BaseObjectModelOutputConverter` and
4545
:py:class:`.BaseAttributeModelOutputConverter` outputs:
@@ -51,8 +51,9 @@ def __call__(
5151
"""
5252
attr_name = model.output.attributes[0].name
5353

54-
ration_width = roi[2] / model.input.shape[2]
55-
ratio_height = roi[3] / model.input.shape[1]
54+
roi_left, roi_top, roi_width, roi_height = roi
55+
ratio_width = roi_width / model.input.shape[2]
56+
ratio_height = roi_height / model.input.shape[1]
5657

5758
raw_predictions = np.transpose(output_layers[0])
5859

@@ -76,13 +77,22 @@ def __call__(
7677
xywh = selected_nms_predictions[:, :4]
7778
conf = selected_nms_predictions[:, 4:5]
7879
class_num = np.zeros_like(conf)
79-
xywh *= np.tile(np.float32([ration_width, ratio_height]), 2)
80+
81+
# Scale and shift bbox coordinates
82+
xywh *= np.tile(np.float32([ratio_width, ratio_height]), 2)
83+
xywh[:, 0] += roi_left # x center
84+
xywh[:, 1] += roi_top # y center
85+
8086
bbox_output = np.concatenate((class_num, conf, xywh), axis=1)
8187

88+
# Process landmarks (5 points, each with x, y, conf)
8289
landmarks = (
8390
selected_nms_predictions[:, 5:20]
84-
* np.tile(np.float32([ration_width, ratio_height, 1.0]), 5)
91+
* np.tile(np.float32([ratio_width, ratio_height, 1.0]), 5)
8592
).reshape(-1, 5, 3)
93+
landmarks[:, :, 0] += roi_left # x
94+
landmarks[:, :, 1] += roi_top # y
95+
8696
landmarks_output = [
8797
[(attr_name, lms, conf)]
8898
for lms, conf in zip(

0 commit comments

Comments
 (0)