added prepare_img function (stretch_and_squish, scale_and_centercrop, scale_and_fill), added unit tests for prepare_img functions, added opencv-contrib-python requirement

geroldmeisinger · geroldmeisinger · commit 3a82a1e10f8c · 2024-06-09T10:40:14.000+02:00
diff --git a/.gitignore b/.gitignore
@@ -50,6 +50,7 @@ coverage.xml
 .hypothesis/
 .pytest_cache/
 cover/
+outputs/
 
 # Translations
 *.mo
diff --git a/requirements.txt b/requirements.txt
@@ -7,6 +7,8 @@ pyparsing==3.1.2
 PySide6==6.7.1
 transformers==4.41.2
 playsound3==2.2.1
+opencv-contrib-python==4.10.0.82
+numpy==1.26.4
 
 # PyTorch
 torch==2.2.2; platform_system != "Windows"
@@ -25,7 +27,6 @@ xformers==0.0.25.post1
 
 # InternLM-XComposer2
 auto-gptq==0.7.1; platform_system == "Linux" or platform_system == "Windows"
-numpy==1.26.4
 
 # WD Tagger
 huggingface-hub==0.23.2
diff --git a/taggui/run_tests.py b/taggui/run_tests.py
@@ -0,0 +1,3 @@
+from tests import test_image
+
+test_image.test_prepares()
diff --git a/taggui/tests/__init__.py b/taggui/tests/__init__.py
diff --git a/taggui/tests/test_image.py b/taggui/tests/test_image.py
@@ -0,0 +1,25 @@
+import os
+
+from PIL import Image
+from PIL.Image import Resampling
+
+from utils.image import prepare_img_scale_and_centercrop, prepare_img_scale_and_fill, prepare_img_stretch_and_squish
+
+def test_prepares():
+    target_size = 1344
+    resampling = Resampling.LANCZOS
+    out_dir = "outputs/"
+
+    os.makedirs(out_dir, exist_ok=True)
+
+    for path in ["images/people_landscape.webp", "images/people_portrait.webp"]:
+        basename, ext = os.path.splitext(os.path.basename(path))
+        img = Image.open(path)
+        for name, func in [("stretch_and_squish", prepare_img_stretch_and_squish), ("scale_and_centercrop", prepare_img_scale_and_centercrop), ("scale_and_fill", prepare_img_scale_and_fill)]:
+            if name == "scale_and_fill":
+                for method in ["white", "gray", "black", "noise", "replicate", "reflect"]:
+                    ret = func(img, target_size, resampling, method)
+                    ret.save(f"{out_dir}/{basename}_{method}.webp", format='WebP', lossless=True, quality=0)
+            else:
+                ret = func(img, target_size, resampling)
+                ret.save(f"{out_dir}/{basename}_{name}.webp", format='WebP', lossless=True, quality=0)
diff --git a/taggui/utils/image.py b/taggui/utils/image.py
@@ -1,7 +1,12 @@
+import random
+
 from dataclasses import dataclass, field
 from pathlib import Path
-from PIL import Image as PilImage, ImageColor
+
+from PIL import Image as PilImage, ImageColor, ImageOps
 from PIL.Image import Resampling
+import cv2 as opencv
+import numpy as np
 
 from PySide6.QtGui import QIcon
 
@@ -14,18 +19,36 @@ class Image:
     thumbnail: QIcon | None = None
 
 # https://pillow.readthedocs.io/en/stable/handbook/concepts.html#filters
-def prepare_img_stretch_and_squish(img: PilImage, target_size: int, resampling=Resampling.LANCZOS) -> PilImage
+def prepare_img_stretch_and_squish(pil_image: PilImage, target_size: int, resample=Resampling.LANCZOS) -> PilImage:
     """Preprocesses an image for the model by simply stretching and squishing it to the target size. Does not retain shapes (see https://github.com/THUDM/CogVLM2/discussions/83)"""
-    return img
+    ret = pil_image.resize((target_size, target_size), resample=resample)
+    return ret
 
-def prepare_img_scale_and_centercrop(img: PilImage, target_size: int, resampling=Resampling.LANCZOS) -> PilImage:
+def prepare_img_scale_and_centercrop(pil_image: PilImage, target_size: int, resample=Resampling.LANCZOS) -> PilImage:
     """Preprocesses an image for the model by scaling the short side to target size and then center cropping a square. May crop important content especially in very rectangular images (this method was used in Stable Diffusion 1 see https://arxiv.org/abs/2112.10752)"""
-    return img
+    width, height = pil_image.size
+    if width < height:
+        new_width = target_size
+        new_height = int(target_size * height / width)
+    else:
+        new_height = target_size
+        new_width = int(target_size * width / height)
+
+    # Resize the image with the calculated dimensions
+    ret = pil_image.resize((new_width, new_height), resample=resample)
 
-def prepare_img_scale_and_fill(img: PilImage, target_size: int, resampling=Resampling.LANCZOS, method: str = "black") -> PilImage:
+    # Center crop a square from the resized image (make sure that there are no off-by-one errors)
+    left = (new_width - target_size) / 2
+    top = (new_height - target_size) / 2
+    right = left + target_size
+    bottom = top + target_size
+    ret = ret.crop((left, top, right, bottom))
+    return ret
+
+def prepare_img_scale_and_fill(pil_image: PilImage, target_size: int, resample=Resampling.LANCZOS, method: str = "black") -> PilImage:
     """
-    Preprocesses an image for the model by scaling the long side to target size and filling borders of the short side with content according to method (color, repeat, noise) until it is square. Introduces new content that wasn't there before which might be caught up by the model ("This image showcases a portrait of a person. On the left and right side are black borders.")
-    - method: can be on of "noise", "repeat" or a color value ("gray", "#000000", "rgb(100%,100%,100%)" etc.) which can be interpreted by Pillow (see https://pillow.readthedocs.io/en/stable/reference/ImageColor.html and https://developer.mozilla.org/en-US/docs/Web/CSS/named-color)
+    Preprocesses an image for the model by scaling the long side to target size and filling borders of the short side with content according to method (color, noise, replicate, reflect) until it is square. Introduces new content that wasn't there before which might be caught up by the model ("This image showcases a portrait of a person. On the left and right side are black borders.")
+    - method: can be on of "noise", "replicate", "reflect" or a color value ("gray", "#000000", "rgb(100%,100%,100%)" etc.) which can be interpreted by Pillow (see https://pillow.readthedocs.io/en/stable/reference/ImageColor.html and https://developer.mozilla.org/en-US/docs/Web/CSS/named-color)
     """
     color = None
     try:
@@ -34,10 +57,38 @@ def prepare_img_scale_and_fill(img: PilImage, target_size: int, resampling=Resam
     except ValueError:
         pass
 
-    match method:
-        case "color": pass # fill borders with color
-        case "noise": pass # fill borders with RGB noise
-        case "repeat": pass # fill borders with color value of the edge
-        case _:
+    width, height = pil_image.size
+    if width > height:
+        new_width = target_size
+        new_height = int((new_width / width) * height)
+    else:
+        new_height = target_size
+        new_width = int((new_height / height) * width)
+
+    pastee = pil_image.resize((new_width, new_height), resample=resample)
+
+    if method == "color": # fill borders with color
+        canvas = PilImage.new("RGB", (target_size, target_size), color)
+        offset = ((target_size - new_width) // 2, (target_size - new_height) // 2)
+        canvas.paste(pastee, offset)
+        ret = canvas
+    elif method == "noise": # fill borders with RGB noise
+        canvas = PilImage.new("RGB", (target_size, target_size))
+        for x in range(target_size):
+            for y in range(target_size):
+                canvas.putpixel((x, y), (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)))
+        canvas.paste(pastee, ((target_size - new_width) // 2, (target_size - new_height) // 2))
+        ret = canvas
+    elif method in ("replicate", "reflect"): # fill borders with color value of the edge
+        left_padding = int((target_size - new_width) / 2)
+        top_padding = int((target_size - new_height) / 2)
+        right_padding = target_size - new_width - left_padding
+        bottom_padding = target_size - new_height - top_padding
+        opencv_pastee = np.array(pastee)
+        borderType = { "replicate": opencv.BORDER_REPLICATE, "reflect": opencv.BORDER_REFLECT }[method]
+        opencv_ret = opencv.copyMakeBorder(opencv_pastee, top_padding, bottom_padding, left_padding, right_padding, borderType=borderType)
+        ret = PilImage.fromarray(opencv_ret)
+    else:
+        raise ValueError(f"Invalid method='{method}'")
 
-    return img
+    return ret

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from tests import test_image`
	`2`	`+`
	`3`	`+test_image.test_prepares()`