Skip to content

Commit

Permalink
Put back Letterbox transform (#212)
Browse files Browse the repository at this point in the history
* put back letterbox padding

* use ultralytics bg

* order output

* fix order

* clip before mask

* adapt test

* useless import

* fix mypy

* fix docstring
  • Loading branch information
MateoLostanlen authored Jun 23, 2024
1 parent 4c7aebf commit 0a1ae7c
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 22 deletions.
48 changes: 47 additions & 1 deletion pyroengine/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.


import cv2 # type: ignore[import-untyped]
import numpy as np
from tqdm import tqdm # type: ignore[import-untyped]

__all__ = ["nms", "xywh2xyxy", "DownloadProgressBar"]
__all__ = ["nms", "xywh2xyxy", "DownloadProgressBar", "letterbox"]


def xywh2xyxy(x: np.ndarray):
Expand All @@ -19,6 +20,51 @@ def xywh2xyxy(x: np.ndarray):
return y


def letterbox(
im: np.ndarray, new_shape: tuple = (640, 640), color: tuple = (114, 114, 114), auto: bool = False, stride: int = 32
):
"""Letterbox image transform for yolo models
Args:
im (np.ndarray): Input image
new_shape (tuple, optional): Image size. Defaults to (640, 640).
color (tuple, optional): Pixel fill value for the area outside the transformed image.
Defaults to (114, 114, 114).
auto (bool, optional): auto padding. Defaults to False.
stride (int, optional): padding stride. Defaults to 32.
Returns:
np.ndarray: Output image
"""
# Resize and pad image while meeting stride-multiple constraints
im = np.array(im)
shape = im.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)

# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])

# Compute padding
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding

if auto: # minimum rectangle
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding

dw /= 2 # divide padding into 2 sides
dh /= 2

if shape[::-1] != new_unpad: # resize
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
# add border
h, w = im.shape[:2]
im_b = np.zeros((h + top + bottom, w + left + right, 3)) + color
im_b[top : top + h, left : left + w, :] = im

return im_b.astype("uint8"), (left, top)


def box_iou(box1: np.ndarray, box2: np.ndarray, eps: float = 1e-7):
"""
Calculate intersection-over-union (IoU) of boxes.
Expand Down
33 changes: 16 additions & 17 deletions pyroengine/vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,12 @@
from typing import Optional, Tuple
from urllib.request import urlretrieve

import cv2 # type: ignore[import-untyped]
import numpy as np
import onnxruntime
from huggingface_hub import HfApi # type: ignore[import-untyped]
from PIL import Image

from .utils import DownloadProgressBar, nms, xywh2xyxy
from .utils import DownloadProgressBar, letterbox, nms, xywh2xyxy

__all__ = ["Classifier"]

Expand All @@ -41,7 +40,7 @@ class Classifier:
model_path: model path
"""

def __init__(self, model_path: Optional[str] = "data/model.onnx", base_img_size: int = 640) -> None:
def __init__(self, model_path: Optional[str] = "data/model.onnx", img_size: tuple = (640, 640)) -> None:
if model_path is None:
model_path = "data/model.onnx"

Expand All @@ -67,7 +66,7 @@ def __init__(self, model_path: Optional[str] = "data/model.onnx", base_img_size:
self.download_model(model_path, expected_sha256)

self.ort_session = onnxruntime.InferenceSession(model_path)
self.base_img_size = base_img_size
self.img_size = img_size

def get_sha(self, siblings):
# Extract the SHA256 hash from the model files metadata
Expand Down Expand Up @@ -99,7 +98,7 @@ def load_metadata(self, metadata_path):
return json.load(f)
return None

def preprocess_image(self, pil_img: Image.Image, new_img_size: list) -> Tuple[np.ndarray, Tuple[int, int]]:
def preprocess_image(self, pil_img: Image.Image) -> Tuple[np.ndarray, Tuple[int, int]]:
"""Preprocess an image for inference
Args:
Expand All @@ -111,20 +110,15 @@ def preprocess_image(self, pil_img: Image.Image, new_img_size: list) -> Tuple[np
- Padding information as a tuple of integers (pad_height, pad_width).
"""

np_img = cv2.resize(np.array(pil_img), new_img_size, interpolation=cv2.INTER_LINEAR)
np_img, pad = letterbox(np.array(pil_img), self.img_size) # Applies letterbox resize with padding
np_img = np.expand_dims(np_img.astype("float"), axis=0) # Add batch dimension
np_img = np.ascontiguousarray(np_img.transpose((0, 3, 1, 2))) # Convert from BHWC to BCHW format
np_img = np_img.astype("float32") / 255 # Normalize to [0, 1]

return np_img
return np_img, pad

def __call__(self, pil_img: Image.Image, occlusion_mask: Optional[np.ndarray] = None) -> np.ndarray:

w, h = pil_img.size
ratio = self.base_img_size / max(w, h)
new_img_size = [int(ratio * w), int(ratio * h)]
new_img_size = [x - x % 32 for x in new_img_size] # size need to be a multiple of 32 to fit the model
np_img = self.preprocess_image(pil_img, new_img_size)
np_img, pad = self.preprocess_image(pil_img)

# ONNX inference
y = self.ort_session.run(["output0"], {"images": np_img})[0][0]
Expand All @@ -136,12 +130,17 @@ def __call__(self, pil_img: Image.Image, occlusion_mask: Optional[np.ndarray] =
# Sort by confidence
y = y[y[:, 4].argsort()]
y = nms(y)
y = y[::-1]

# Normalize preds
if len(y) > 0:
# Normalize Output
y[:, :4:2] /= new_img_size[0]
y[:, 1:4:2] /= new_img_size[1]
# Remove padding
left_pad, top_pad = pad
y[:, :4:2] -= left_pad
y[:, 1:4:2] -= top_pad
y[:, :4:2] /= self.img_size[1] - 2 * left_pad
y[:, 1:4:2] /= self.img_size[0] - 2 * top_pad
y = np.clip(y, 0, 1)
else:
y = np.zeros((0, 5)) # normalize output

Expand All @@ -162,4 +161,4 @@ def __call__(self, pil_img: Image.Image, occlusion_mask: Optional[np.ndarray] =

y = y[keep]

return np.clip(y, 0, 1)
return y
11 changes: 7 additions & 4 deletions tests/test_vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,22 +30,25 @@ def test_classifier(mock_wildfire_image):
# Instantiate the ONNX model
model = Classifier()
# Check preprocessing
out = model.preprocess_image(mock_wildfire_image, (640, 384))
out, pad = model.preprocess_image(mock_wildfire_image)
assert isinstance(out, np.ndarray) and out.dtype == np.float32
assert out.shape == (1, 3, 384, 640)
assert out.shape == (1, 3, 640, 640)
assert isinstance(pad, tuple)
# Check inference
out = model(mock_wildfire_image)
assert out.shape == (1, 5)
conf = np.max(out[:, 4])
assert conf >= 0 and conf <= 1

# Test mask
mask = np.ones((640, 384))
mask = np.ones((384, 640))
out = model(mock_wildfire_image, mask)
print(out)
assert out.shape == (1, 5)

mask = np.zeros((640, 384))
mask = np.zeros((384, 640))
out = model(mock_wildfire_image, mask)
print(out)
assert out.shape == (0, 5)
os.remove(model_path)
os.remove(METADATA_PATH)
Expand Down

0 comments on commit 0a1ae7c

Please sign in to comment.