Skip to content

Commit 3a82a1e

Browse files
added prepare_img function (stretch_and_squish, scale_and_centercrop, scale_and_fill), added unit tests for prepare_img functions, added opencv-contrib-python requirement
1 parent 9b4bc07 commit 3a82a1e

File tree

6 files changed

+96
-15
lines changed

6 files changed

+96
-15
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ coverage.xml
5050
.hypothesis/
5151
.pytest_cache/
5252
cover/
53+
outputs/
5354

5455
# Translations
5556
*.mo

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ pyparsing==3.1.2
77
PySide6==6.7.1
88
transformers==4.41.2
99
playsound3==2.2.1
10+
opencv-contrib-python==4.10.0.82
11+
numpy==1.26.4
1012

1113
# PyTorch
1214
torch==2.2.2; platform_system != "Windows"
@@ -25,7 +27,6 @@ xformers==0.0.25.post1
2527

2628
# InternLM-XComposer2
2729
auto-gptq==0.7.1; platform_system == "Linux" or platform_system == "Windows"
28-
numpy==1.26.4
2930

3031
# WD Tagger
3132
huggingface-hub==0.23.2

taggui/run_tests.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from tests import test_image
2+
3+
test_image.test_prepares()

taggui/tests/__init__.py

Whitespace-only changes.

taggui/tests/test_image.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import os
2+
3+
from PIL import Image
4+
from PIL.Image import Resampling
5+
6+
from utils.image import prepare_img_scale_and_centercrop, prepare_img_scale_and_fill, prepare_img_stretch_and_squish
7+
8+
def test_prepares():
9+
target_size = 1344
10+
resampling = Resampling.LANCZOS
11+
out_dir = "outputs/"
12+
13+
os.makedirs(out_dir, exist_ok=True)
14+
15+
for path in ["images/people_landscape.webp", "images/people_portrait.webp"]:
16+
basename, ext = os.path.splitext(os.path.basename(path))
17+
img = Image.open(path)
18+
for name, func in [("stretch_and_squish", prepare_img_stretch_and_squish), ("scale_and_centercrop", prepare_img_scale_and_centercrop), ("scale_and_fill", prepare_img_scale_and_fill)]:
19+
if name == "scale_and_fill":
20+
for method in ["white", "gray", "black", "noise", "replicate", "reflect"]:
21+
ret = func(img, target_size, resampling, method)
22+
ret.save(f"{out_dir}/{basename}_{method}.webp", format='WebP', lossless=True, quality=0)
23+
else:
24+
ret = func(img, target_size, resampling)
25+
ret.save(f"{out_dir}/{basename}_{name}.webp", format='WebP', lossless=True, quality=0)

taggui/utils/image.py

Lines changed: 65 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
1+
import random
2+
13
from dataclasses import dataclass, field
24
from pathlib import Path
3-
from PIL import Image as PilImage, ImageColor
5+
6+
from PIL import Image as PilImage, ImageColor, ImageOps
47
from PIL.Image import Resampling
8+
import cv2 as opencv
9+
import numpy as np
510

611
from PySide6.QtGui import QIcon
712

@@ -14,18 +19,36 @@ class Image:
1419
thumbnail: QIcon | None = None
1520

1621
# https://pillow.readthedocs.io/en/stable/handbook/concepts.html#filters
17-
def prepare_img_stretch_and_squish(img: PilImage, target_size: int, resampling=Resampling.LANCZOS) -> PilImage
22+
def prepare_img_stretch_and_squish(pil_image: PilImage, target_size: int, resample=Resampling.LANCZOS) -> PilImage:
1823
"""Preprocesses an image for the model by simply stretching and squishing it to the target size. Does not retain shapes (see https://github.com/THUDM/CogVLM2/discussions/83)"""
19-
return img
24+
ret = pil_image.resize((target_size, target_size), resample=resample)
25+
return ret
2026

21-
def prepare_img_scale_and_centercrop(img: PilImage, target_size: int, resampling=Resampling.LANCZOS) -> PilImage:
27+
def prepare_img_scale_and_centercrop(pil_image: PilImage, target_size: int, resample=Resampling.LANCZOS) -> PilImage:
2228
"""Preprocesses an image for the model by scaling the short side to target size and then center cropping a square. May crop important content especially in very rectangular images (this method was used in Stable Diffusion 1 see https://arxiv.org/abs/2112.10752)"""
23-
return img
29+
width, height = pil_image.size
30+
if width < height:
31+
new_width = target_size
32+
new_height = int(target_size * height / width)
33+
else:
34+
new_height = target_size
35+
new_width = int(target_size * width / height)
36+
37+
# Resize the image with the calculated dimensions
38+
ret = pil_image.resize((new_width, new_height), resample=resample)
2439

25-
def prepare_img_scale_and_fill(img: PilImage, target_size: int, resampling=Resampling.LANCZOS, method: str = "black") -> PilImage:
40+
# Center crop a square from the resized image (make sure that there are no off-by-one errors)
41+
left = (new_width - target_size) / 2
42+
top = (new_height - target_size) / 2
43+
right = left + target_size
44+
bottom = top + target_size
45+
ret = ret.crop((left, top, right, bottom))
46+
return ret
47+
48+
def prepare_img_scale_and_fill(pil_image: PilImage, target_size: int, resample=Resampling.LANCZOS, method: str = "black") -> PilImage:
2649
"""
27-
Preprocesses an image for the model by scaling the long side to target size and filling borders of the short side with content according to method (color, repeat, noise) until it is square. Introduces new content that wasn't there before which might be caught up by the model ("This image showcases a portrait of a person. On the left and right side are black borders.")
28-
- method: can be on of "noise", "repeat" or a color value ("gray", "#000000", "rgb(100%,100%,100%)" etc.) which can be interpreted by Pillow (see https://pillow.readthedocs.io/en/stable/reference/ImageColor.html and https://developer.mozilla.org/en-US/docs/Web/CSS/named-color)
50+
Preprocesses an image for the model by scaling the long side to target size and filling borders of the short side with content according to method (color, noise, replicate, reflect) until it is square. Introduces new content that wasn't there before which might be caught up by the model ("This image showcases a portrait of a person. On the left and right side are black borders.")
51+
- method: can be on of "noise", "replicate", "reflect" or a color value ("gray", "#000000", "rgb(100%,100%,100%)" etc.) which can be interpreted by Pillow (see https://pillow.readthedocs.io/en/stable/reference/ImageColor.html and https://developer.mozilla.org/en-US/docs/Web/CSS/named-color)
2952
"""
3053
color = None
3154
try:
@@ -34,10 +57,38 @@ def prepare_img_scale_and_fill(img: PilImage, target_size: int, resampling=Resam
3457
except ValueError:
3558
pass
3659

37-
match method:
38-
case "color": pass # fill borders with color
39-
case "noise": pass # fill borders with RGB noise
40-
case "repeat": pass # fill borders with color value of the edge
41-
case _:
60+
width, height = pil_image.size
61+
if width > height:
62+
new_width = target_size
63+
new_height = int((new_width / width) * height)
64+
else:
65+
new_height = target_size
66+
new_width = int((new_height / height) * width)
67+
68+
pastee = pil_image.resize((new_width, new_height), resample=resample)
69+
70+
if method == "color": # fill borders with color
71+
canvas = PilImage.new("RGB", (target_size, target_size), color)
72+
offset = ((target_size - new_width) // 2, (target_size - new_height) // 2)
73+
canvas.paste(pastee, offset)
74+
ret = canvas
75+
elif method == "noise": # fill borders with RGB noise
76+
canvas = PilImage.new("RGB", (target_size, target_size))
77+
for x in range(target_size):
78+
for y in range(target_size):
79+
canvas.putpixel((x, y), (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)))
80+
canvas.paste(pastee, ((target_size - new_width) // 2, (target_size - new_height) // 2))
81+
ret = canvas
82+
elif method in ("replicate", "reflect"): # fill borders with color value of the edge
83+
left_padding = int((target_size - new_width) / 2)
84+
top_padding = int((target_size - new_height) / 2)
85+
right_padding = target_size - new_width - left_padding
86+
bottom_padding = target_size - new_height - top_padding
87+
opencv_pastee = np.array(pastee)
88+
borderType = { "replicate": opencv.BORDER_REPLICATE, "reflect": opencv.BORDER_REFLECT }[method]
89+
opencv_ret = opencv.copyMakeBorder(opencv_pastee, top_padding, bottom_padding, left_padding, right_padding, borderType=borderType)
90+
ret = PilImage.fromarray(opencv_ret)
91+
else:
92+
raise ValueError(f"Invalid method='{method}'")
4293

43-
return img
94+
return ret

0 commit comments

Comments
 (0)