Merge pull request #1674 from roboflow/Update_Perspective_Correction_For_Dimensionality

lou-roboflow · web-flow · commit b16f4fc6467b · 2025-11-03T15:24:23.000-05:00
Update perspective correction for dimensionality
diff --git a/inference/core/workflows/core_steps/transformations/perspective_correction/v1.py b/inference/core/workflows/core_steps/transformations/perspective_correction/v1.py
@@ -113,7 +113,11 @@ def get_parameters_accepting_batches(cls) -> List[str]:
 
     @classmethod
     def get_parameters_accepting_batches_and_scalars(cls) -> List[str]:
-        return ["perspective_polygons"]
+        return [
+            "perspective_polygons",
+            "transformed_rect_width",
+            "transformed_rect_height",
+        ]
 
     @classmethod
     def describe_outputs(cls) -> List[OutputDefinition]:
@@ -688,8 +692,8 @@ def run(
             List[List[List[int]]],
             List[List[List[List[int]]]],
         ],
-        transformed_rect_width: int,
-        transformed_rect_height: int,
+        transformed_rect_width: Union[int, List[int], np.ndarray],
+        transformed_rect_height: Union[int, List[int], np.ndarray],
         extend_perspective_polygon_by_detections_anchor: Union[
             sv.Position, Literal[ALL_POSITIONS]
         ],
@@ -723,23 +727,36 @@ def run(
                 raise ValueError(
                     f"Predictions batch size ({batch_size}) does not match number of perspective polygons ({largest_perspective_polygons})"
                 )
-            for polygon, detections in zip(largest_perspective_polygons, predictions):
+            if isinstance(transformed_rect_height, int):
+                transformed_rect_height = [transformed_rect_height] * batch_size
+            if isinstance(transformed_rect_width, int):
+                transformed_rect_width = [transformed_rect_width] * batch_size
+            for polygon, detections, width, height in zip(
+                largest_perspective_polygons,
+                predictions,
+                list(transformed_rect_width),
+                list(transformed_rect_height),
+            ):
                 if polygon is None:
                     self.perspective_transformers.append(None)
                     continue
                 self.perspective_transformers.append(
                     generate_transformation_matrix(
                         src_polygon=polygon,
                         detections=detections,
-                        transformed_rect_width=transformed_rect_width,
-                        transformed_rect_height=transformed_rect_height,
+                        transformed_rect_width=width,
+                        transformed_rect_height=height,
                         detections_anchor=extend_perspective_polygon_by_detections_anchor,
                     )
                 )
 
         result = []
-        for detections, perspective_transformer_w_h, image in zip(
-            predictions, self.perspective_transformers, images
+        for detections, perspective_transformer_w_h, image, width, height in zip(
+            predictions,
+            self.perspective_transformers,
+            images,
+            transformed_rect_width,
+            transformed_rect_height,
         ):
             perspective_transformer, extended_width, extended_height = (
                 perspective_transformer_w_h
@@ -751,8 +768,8 @@ def run(
                     src=image.numpy_image,
                     M=perspective_transformer,
                     dsize=(
-                        transformed_rect_width + int(round(extended_width)),
-                        transformed_rect_height + int(round(extended_height)),
+                        int(round(width)) + int(round(extended_width)),
+                        int(round(height)) + int(round(extended_height)),
                     ),
                 )
                 result_image = WorkflowImageData.copy_and_replace(
@@ -765,9 +782,9 @@ def run(
                     {
                         OUTPUT_DETECTIONS_KEY: None,
                         OUTPUT_IMAGE_KEY: result_image,
-                        OUTPUT_EXTENDED_TRANSFORMED_RECT_WIDTH_KEY: transformed_rect_width
+                        OUTPUT_EXTENDED_TRANSFORMED_RECT_WIDTH_KEY: width
                         + int(round(extended_width)),
-                        OUTPUT_EXTENDED_TRANSFORMED_RECT_HEIGHT_KEY: transformed_rect_height
+                        OUTPUT_EXTENDED_TRANSFORMED_RECT_HEIGHT_KEY: height
                         + int(round(extended_height)),
                     }
                 )
@@ -776,19 +793,17 @@ def run(
             corrected_detections = correct_detections(
                 detections=detections,
                 perspective_transformer=perspective_transformer,
-                transformed_rect_width=transformed_rect_width
-                + int(round(extended_width)),
-                transformed_rect_height=transformed_rect_height
-                + int(round(extended_height)),
+                transformed_rect_width=width + int(round(extended_width)),
+                transformed_rect_height=height + int(round(extended_height)),
             )
 
             result.append(
                 {
                     OUTPUT_DETECTIONS_KEY: corrected_detections,
                     OUTPUT_IMAGE_KEY: result_image,
-                    OUTPUT_EXTENDED_TRANSFORMED_RECT_WIDTH_KEY: transformed_rect_width
+                    OUTPUT_EXTENDED_TRANSFORMED_RECT_WIDTH_KEY: width
                     + int(round(extended_width)),
-                    OUTPUT_EXTENDED_TRANSFORMED_RECT_HEIGHT_KEY: transformed_rect_height
+                    OUTPUT_EXTENDED_TRANSFORMED_RECT_HEIGHT_KEY: height
                     + int(round(extended_height)),
                 }
             )
diff --git a/tests/workflows/unit_tests/core_steps/transformations/test_perspective_correction.py b/tests/workflows/unit_tests/core_steps/transformations/test_perspective_correction.py
@@ -413,3 +413,66 @@ def test_warp_image():
     assert isinstance(
         result[0]["warped_image"], WorkflowImageData
     ), f"warped_image must be of type WorkflowImageData"
+
+
+def test_warp_image_batch_dims():
+    # given
+    dummy_image = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
+    dummy_predictions = sv.Detections(xyxy=np.array([[10, 10, 20, 20]]))
+    perspective_correction_block = PerspectiveCorrectionBlockV1()
+
+    workflow_image_data = WorkflowImageData(
+        parent_metadata=ImageParentMetadata(parent_id="test"), numpy_image=dummy_image
+    )
+
+    # when
+    result = perspective_correction_block.run(
+        images=[workflow_image_data],
+        predictions=[dummy_predictions],
+        perspective_polygons=[[[1, 1], [99, 1], [99, 99], [1, 99]]],
+        transformed_rect_width=[200],
+        transformed_rect_height=[200],
+        extend_perspective_polygon_by_detections_anchor=None,
+        warp_image=True,
+    )
+
+    # then
+    assert "warped_image" in result[0], "warped_image key must be present in the result"
+    assert isinstance(
+        result[0]["warped_image"], WorkflowImageData
+    ), f"warped_image must be of type WorkflowImageData"
+
+
+def test_batch_input():
+    # given
+    batch_size = 3
+    dummy_images = [
+        np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
+    ] * batch_size
+    dummy_predictions = [sv.Detections(xyxy=np.array([[10, 10, 20, 20]]))] * batch_size
+    perspective_correction_block = PerspectiveCorrectionBlockV1()
+
+    workflow_image_data = [
+        WorkflowImageData(
+            parent_metadata=ImageParentMetadata(parent_id="test"),
+            numpy_image=dummy_image,
+        )
+        for dummy_image in dummy_images
+    ]
+
+    # when
+    result = perspective_correction_block.run(
+        images=workflow_image_data,
+        predictions=dummy_predictions,
+        perspective_polygons=[[[1, 1], [99, 1], [99, 99], [1, 99]]],
+        transformed_rect_width=[200] * batch_size,
+        transformed_rect_height=[200] * batch_size,
+        extend_perspective_polygon_by_detections_anchor=None,
+        warp_image=True,
+    )
+
+    # then
+    assert "warped_image" in result[0], "warped_image key must be present in the result"
+    assert isinstance(
+        result[0]["warped_image"], WorkflowImageData
+    ), f"warped_image must be of type WorkflowImageData"