Merge branch 'main' into feat/openai-block-v4

brunopicinin · web-flow · commit 23e3094df3fd · 2025-12-04T14:25:02.000-03:00
diff --git a/inference_experimental/inference_exp/models/auto_loaders/core.py b/inference_experimental/inference_exp/models/auto_loaders/core.py
@@ -653,7 +653,9 @@ def create_symlinks_to_shared_blobs(
         link_name = os.path.join(model_dir, file_handle)
         target_path = shared_files_mapping[file_handle]
         result[file_handle] = link_name
-        if os.path.exists(link_name):
+        if os.path.exists(link_name) and (
+            not os.path.islink(link_name) or os.path.realpath(link_name) == target_path
+        ):
             continue
         handle_symlink_creation(
             target_path=target_path,
diff --git a/inference_experimental/inference_exp/models/auto_loaders/models_registry.py b/inference_experimental/inference_exp/models/auto_loaders/models_registry.py
@@ -362,6 +362,10 @@ class RegistryEntry:
     ("doctr", STRUCTURED_OCR_TASK, BackendType.TORCH): LazyClass(
         module_name="inference_exp.models.doctr.doctr_torch", class_name="DocTR"
     ),
+    ("easy-ocr", STRUCTURED_OCR_TASK, BackendType.TORCH): LazyClass(
+        module_name="inference_exp.models.easy_ocr.easy_ocr_torch",
+        class_name="EasyOCRTorch",
+    ),
 }
 
 
diff --git a/inference_experimental/inference_exp/models/easy_ocr/__init__.py b/inference_experimental/inference_exp/models/easy_ocr/__init__.py
diff --git a/inference_experimental/inference_exp/models/easy_ocr/easy_ocr_torch.py b/inference_experimental/inference_exp/models/easy_ocr/easy_ocr_torch.py
@@ -0,0 +1,221 @@
+from typing import List, Optional, Tuple, Union
+
+import easyocr
+import numpy as np
+import torch
+from inference_exp import Detections, StructuredOCRModel
+from inference_exp.configuration import DEFAULT_DEVICE
+from inference_exp.entities import ColorFormat, ImageDimensions
+from inference_exp.errors import CorruptedModelPackageError, ModelRuntimeError
+from inference_exp.models.common.model_packages import get_model_package_contents
+from inference_exp.utils.file_system import read_json
+from pydantic import BaseModel
+
+Point = Tuple[int, int]
+Coordinates = Tuple[Point, Point, Point, Point]
+DetectedText = str
+Confidence = float
+EasyOCRRawPrediction = Tuple[Coordinates, DetectedText, Confidence]
+
+
+RECOGNIZED_DETECTORS = {"craft", "dbnet18", "dbnet50"}
+
+
+class EasyOcrConfig(BaseModel):
+    lang_list: List[str]
+    detector_model_file_name: str
+    recognition_model_file_name: str
+    detect_network: str
+    recognition_network: str
+
+
+class EasyOCRTorch(
+    StructuredOCRModel[List[np.ndarray], ImageDimensions, EasyOCRRawPrediction]
+):
+
+    @classmethod
+    def from_pretrained(
+        cls,
+        model_name_or_path: str,
+        device: torch.device = DEFAULT_DEVICE,
+        **kwargs,
+    ) -> "StructuredOCRModel":
+        package_contents = get_model_package_contents(
+            model_package_dir=model_name_or_path, elements=["easy-ocr-config.json"]
+        )
+        config = parse_easy_ocr_config(
+            config_path=package_contents["easy-ocr-config.json"]
+        )
+        device_string = device.type
+        if device.type == "cuda" and device.index:
+            device_string = f"{device_string}:{device.index}"
+        try:
+            model = easyocr.Reader(
+                config.lang_list,
+                download_enabled=False,
+                model_storage_directory=model_name_or_path,
+                user_network_directory=model_name_or_path,
+                detect_network=config.detect_network,
+                recog_network=config.recognition_network,
+                detector=True,
+                recognizer=True,
+                gpu=device_string,
+            )
+        except Exception as error:
+            raise CorruptedModelPackageError(
+                message=f"EasyOCR model package is broken - could not parse model config file. Error: {error}"
+                f"If you attempt to run `inference-exp` locally - inspect the contents of local directory to check "
+                f"model package - config file is corrupted. If you run the model on Roboflow platform - "
+                f"contact us.",
+                help_url="https://todo",
+            ) from error
+        return cls(model=model, device=device)
+
+    def __init__(
+        self,
+        model: easyocr.Reader,
+        device: torch.device,
+    ):
+        self._model = model
+        self._device = device
+
+    @property
+    def class_names(self) -> List[str]:
+        return ["text-region"]
+
+    def pre_process(
+        self,
+        images: Union[torch.Tensor, List[torch.Tensor], np.ndarray, List[np.ndarray]],
+        input_color_format: Optional[ColorFormat] = None,
+        **kwargs,
+    ) -> Tuple[List[np.ndarray], List[ImageDimensions]]:
+        if isinstance(images, np.ndarray):
+            input_color_format = input_color_format or "bgr"
+            if input_color_format != "bgr":
+                images = images[:, :, ::-1]
+            h, w = images.shape[:2]
+            return [images], [ImageDimensions(height=h, width=w)]
+        if isinstance(images, torch.Tensor):
+            input_color_format = input_color_format or "rgb"
+            if len(images.shape) == 3:
+                images = torch.unsqueeze(images, dim=0)
+            if input_color_format != "bgr":
+                images = images[:, [2, 1, 0], :, :]
+            result = []
+            dimensions = []
+            for image in images:
+                np_image = image.permute(1, 2, 0).cpu().numpy()
+                result.append(np_image)
+                dimensions.append(
+                    ImageDimensions(height=np_image.shape[0], width=np_image.shape[1])
+                )
+            return result, dimensions
+        if not isinstance(images, list):
+            raise ModelRuntimeError(
+                message="Pre-processing supports only np.array or torch.Tensor or list of above.",
+                help_url="https://todo",
+            )
+        if not len(images):
+            raise ModelRuntimeError(
+                message="Detected empty input to the model", help_url="https://todo"
+            )
+        if isinstance(images[0], np.ndarray):
+            input_color_format = input_color_format or "bgr"
+            if input_color_format != "bgr":
+                images = [i[:, :, ::-1] for i in images]
+            dimensions = [
+                ImageDimensions(height=i.shape[0], width=i.shape[1]) for i in images
+            ]
+            return images, dimensions
+        if isinstance(images[0], torch.Tensor):
+            result = []
+            dimensions = []
+            input_color_format = input_color_format or "rgb"
+            for image in images:
+                if input_color_format != "bgr":
+                    image = image[[2, 1, 0], :, :]
+                np_image = image.permute(1, 2, 0).cpu().numpy()
+                result.append(np_image)
+                dimensions.append(
+                    ImageDimensions(height=np_image.shape[0], width=np_image.shape[1])
+                )
+            return result, dimensions
+        raise ModelRuntimeError(
+            message=f"Detected unknown input batch element: {type(images[0])}",
+            help_url="https://todo",
+        )
+
+    def forward(
+        self, pre_processed_images: List[np.ndarray], **kwargs
+    ) -> List[EasyOCRRawPrediction]:
+        all_results = []
+        for image in pre_processed_images:
+            image_results_raw = self._model.readtext(image)
+            image_results_parsed = [
+                (
+                    [
+                        [x.item() if not isinstance(x, (int, float)) else x for x in c]
+                        for c in res[0]
+                    ],
+                    res[1],
+                    res[2].item() if not isinstance(res[2], (int, float)) else res[2],
+                )
+                for res in image_results_raw
+            ]
+            all_results.append(image_results_parsed)
+        return all_results
+
+    def post_process(
+        self,
+        model_results: List[EasyOCRRawPrediction],
+        pre_processing_meta: List[ImageDimensions],
+        confidence_threshold: float = 0.3,
+        text_regions_separator: str = " ",
+        **kwargs,
+    ) -> Tuple[List[str], List[Detections]]:
+        rendered_texts, all_detections = [], []
+        for single_image_result, original_dimensions in zip(
+            model_results, pre_processing_meta
+        ):
+            whole_image_text = []
+            xyxy = []
+            confidence = []
+            class_id = []
+            for box, text, text_confidence in single_image_result:
+                if text_confidence < confidence_threshold:
+                    continue
+                whole_image_text.append(text)
+                min_x = min(p[0] for p in box)
+                min_y = min(p[1] for p in box)
+                max_x = max(p[0] for p in box)
+                max_y = max(p[1] for p in box)
+                box_xyxy = [min_x, min_y, max_x, max_y]
+                xyxy.append(box_xyxy)
+                confidence.append(float(text_confidence))
+                class_id.append(0)
+            while_image_text_joined = text_regions_separator.join(whole_image_text)
+            rendered_texts.append(while_image_text_joined)
+            data = [{"text": text} for text in whole_image_text]
+            all_detections.append(
+                Detections(
+                    xyxy=torch.tensor(xyxy, device=self._device),
+                    class_id=torch.tensor(class_id, device=self._device),
+                    confidence=torch.tensor(confidence, device=self._device),
+                    bboxes_metadata=data,
+                )
+            )
+        return rendered_texts, all_detections
+
+
+def parse_easy_ocr_config(config_path: str) -> EasyOcrConfig:
+    try:
+        raw_config = read_json(config_path)
+        return EasyOcrConfig.model_validate(raw_config)
+    except Exception as error:
+        raise CorruptedModelPackageError(
+            message=f"EasyOCR model package is broken - could not parse model config file. Error: {error}"
+            f"If you attempt to run `inference-exp` locally - inspect the contents of local directory to check "
+            f"model package - config file is corrupted. If you run the model on Roboflow platform - "
+            f"contact us.",
+            help_url="https://todo",
+        ) from error
diff --git a/inference_experimental/pyproject.toml b/inference_experimental/pyproject.toml
@@ -28,7 +28,8 @@ dependencies = [
   "filelock>=3.12.0,<4.0.0",
   "rich>=14.1.0,<15.0.0",
   "segmentation-models-pytorch>=0.5.0,<1.0.0",
-  "scikit-image>=0.24.0,<0.26.0"
+  "scikit-image>=0.24.0,<0.26.0",
+  "easyocr~=1.7.2",
 ]
 
 [project.optional-dependencies]
diff --git a/inference_experimental/tests/e2e_platform_tests/test_easy_ocr_e2e.py b/inference_experimental/tests/e2e_platform_tests/test_easy_ocr_e2e.py
@@ -0,0 +1,48 @@
+import numpy as np
+import pytest
+from inference_exp import AutoModel, Detections
+
+
+@pytest.mark.e2e_model_inference
+def test_easyocr_english(
+    ocr_test_image_numpy: np.ndarray, roboflow_api_key: str
+) -> None:
+    # given
+    model = AutoModel.from_pretrained("easy-ocr-english")
+
+    # when
+    result = model(ocr_test_image_numpy)
+
+    # then
+    assert len(result) == 2
+    assert result[0][0].startswith("This is a test image for OCR")
+    assert isinstance(result[1][0], Detections)
+
+
+@pytest.mark.e2e_model_inference
+def test_easyocr_latin(ocr_test_image_numpy: np.ndarray, roboflow_api_key: str) -> None:
+    # given
+    model = AutoModel.from_pretrained("easy-ocr-latin")
+
+    # when
+    result = model(ocr_test_image_numpy)
+
+    # then
+    assert len(result) == 2
+    assert result[0][0].startswith("This is a test image for OCR")
+    assert isinstance(result[1][0], Detections)
+
+
+@pytest.mark.e2e_model_inference
+def test_easyocr_japanese(
+    ocr_test_image_numpy: np.ndarray, roboflow_api_key: str
+) -> None:
+    # given
+    model = AutoModel.from_pretrained("easy-ocr-japanese")
+
+    # when
+    result = model(ocr_test_image_numpy)
+
+    # then
+    assert len(result) == 2
+    assert isinstance(result[1][0], Detections)
diff --git a/inference_experimental/tests/integration_tests/models/conftest.py b/inference_experimental/tests/integration_tests/models/conftest.py
@@ -142,6 +142,7 @@
 
 DEPTH_ANYTHING_V2_SMALL_PACKAGE_URL = "https://storage.googleapis.com/roboflow-tests-assets/rf-platform-models/depth-anything-v2.zip"
 DOCTR_PACKAGE_URL = "https://storage.googleapis.com/roboflow-tests-assets/rf-platform-models/doctr-dbnet-rn50-crnn-vgg16.zip"
+EASY_OCR_PACKAGE_URL = "https://storage.googleapis.com/roboflow-tests-assets/rf-platform-models/easy-ocr-english.zip"
 
 
 @pytest.fixture(scope="module")
@@ -1142,3 +1143,10 @@ def doctr_package() -> str:
     return download_model_package(
         model_package_zip_url=DOCTR_PACKAGE_URL, package_name="doctr"
     )
+
+
+@pytest.fixture(scope="module")
+def easy_ocr_package() -> str:
+    return download_model_package(
+        model_package_zip_url=EASY_OCR_PACKAGE_URL, package_name="easy-ocr"
+    )
diff --git a/inference_experimental/tests/integration_tests/models/test_easy_ocr_predictions.py b/inference_experimental/tests/integration_tests/models/test_easy_ocr_predictions.py
diff --git a/inference_experimental/uv.lock b/inference_experimental/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -362,6 +362,10 @@ class RegistryEntry:`
`362`	`362`	`("doctr", STRUCTURED_OCR_TASK, BackendType.TORCH): LazyClass(`
`363`	`363`	`module_name="inference_exp.models.doctr.doctr_torch", class_name="DocTR"`
`364`	`364`	`),`
	`365`	`+ ("easy-ocr", STRUCTURED_OCR_TASK, BackendType.TORCH): LazyClass(`
	`366`	`+ module_name="inference_exp.models.easy_ocr.easy_ocr_torch",`
	`367`	`+ class_name="EasyOCRTorch",`
	`368`	`+ ),`
`365`	`369`	`}`
`366`	`370`
`367`	`371`
Original file line number	Diff line number	Diff line change
`@@ -28,7 +28,8 @@ dependencies = [`
`28`	`28`	`"filelock>=3.12.0,<4.0.0",`
`29`	`29`	`"rich>=14.1.0,<15.0.0",`
`30`	`30`	`"segmentation-models-pytorch>=0.5.0,<1.0.0",`
`31`		`- "scikit-image>=0.24.0,<0.26.0"`
	`31`	`+ "scikit-image>=0.24.0,<0.26.0",`
	`32`	`+ "easyocr~=1.7.2",`
`32`	`33`	`]`
`33`	`34`
`34`	`35`	`[project.optional-dependencies]`