diff --git a/pyroengine/utils.py b/pyroengine/utils.py index 1629bb8..ba68d4a 100644 --- a/pyroengine/utils.py +++ b/pyroengine/utils.py @@ -4,10 +4,11 @@ # See LICENSE or go to for full license details. +import cv2 # type: ignore[import-untyped] import numpy as np from tqdm import tqdm # type: ignore[import-untyped] -__all__ = ["nms", "xywh2xyxy", "DownloadProgressBar"] +__all__ = ["nms", "xywh2xyxy", "DownloadProgressBar", "letterbox"] def xywh2xyxy(x: np.ndarray): @@ -19,6 +20,51 @@ def xywh2xyxy(x: np.ndarray): return y +def letterbox( + im: np.ndarray, new_shape: tuple = (640, 640), color: tuple = (114, 114, 114), auto: bool = False, stride: int = 32 +): + """Letterbox image transform for yolo models + Args: + im (np.ndarray): Input image + new_shape (tuple, optional): Image size. Defaults to (640, 640). + color (tuple, optional): Pixel fill value for the area outside the transformed image. + Defaults to (114, 114, 114). + auto (bool, optional): auto padding. Defaults to False. + stride (int, optional): padding stride. Defaults to 32. + Returns: + np.ndarray: Output image + """ + # Resize and pad image while meeting stride-multiple constraints + im = np.array(im) + shape = im.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + + # Compute padding + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + # add border + h, w = im.shape[:2] + im_b = np.zeros((h + top + bottom, w + left + right, 3)) + color + im_b[top : top + h, left : left + w, :] = im + + return im_b.astype("uint8"), (left, top) + + def box_iou(box1: np.ndarray, box2: np.ndarray, eps: float = 1e-7): """ Calculate intersection-over-union (IoU) of boxes. diff --git a/pyroengine/vision.py b/pyroengine/vision.py index dee65a5..9bac5dc 100644 --- a/pyroengine/vision.py +++ b/pyroengine/vision.py @@ -8,13 +8,12 @@ from typing import Optional, Tuple from urllib.request import urlretrieve -import cv2 # type: ignore[import-untyped] import numpy as np import onnxruntime from huggingface_hub import HfApi # type: ignore[import-untyped] from PIL import Image -from .utils import DownloadProgressBar, nms, xywh2xyxy +from .utils import DownloadProgressBar, letterbox, nms, xywh2xyxy __all__ = ["Classifier"] @@ -41,7 +40,7 @@ class Classifier: model_path: model path """ - def __init__(self, model_path: Optional[str] = "data/model.onnx", base_img_size: int = 640) -> None: + def __init__(self, model_path: Optional[str] = "data/model.onnx", img_size: tuple = (640, 640)) -> None: if model_path is None: model_path = "data/model.onnx" @@ -67,7 +66,7 @@ def __init__(self, model_path: Optional[str] = "data/model.onnx", base_img_size: self.download_model(model_path, expected_sha256) self.ort_session = onnxruntime.InferenceSession(model_path) - self.base_img_size = base_img_size + self.img_size = img_size def get_sha(self, siblings): # Extract the SHA256 hash from the model files metadata @@ -99,7 +98,7 @@ def load_metadata(self, metadata_path): return json.load(f) return None - def preprocess_image(self, pil_img: Image.Image, new_img_size: list) -> Tuple[np.ndarray, Tuple[int, int]]: + def preprocess_image(self, pil_img: Image.Image) -> Tuple[np.ndarray, Tuple[int, int]]: """Preprocess an image for inference Args: @@ -111,20 +110,15 @@ def preprocess_image(self, pil_img: Image.Image, new_img_size: list) -> Tuple[np - Padding information as a tuple of integers (pad_height, pad_width). """ - np_img = cv2.resize(np.array(pil_img), new_img_size, interpolation=cv2.INTER_LINEAR) + np_img, pad = letterbox(np.array(pil_img), self.img_size) # Applies letterbox resize with padding np_img = np.expand_dims(np_img.astype("float"), axis=0) # Add batch dimension np_img = np.ascontiguousarray(np_img.transpose((0, 3, 1, 2))) # Convert from BHWC to BCHW format np_img = np_img.astype("float32") / 255 # Normalize to [0, 1] - return np_img + return np_img, pad def __call__(self, pil_img: Image.Image, occlusion_mask: Optional[np.ndarray] = None) -> np.ndarray: - - w, h = pil_img.size - ratio = self.base_img_size / max(w, h) - new_img_size = [int(ratio * w), int(ratio * h)] - new_img_size = [x - x % 32 for x in new_img_size] # size need to be a multiple of 32 to fit the model - np_img = self.preprocess_image(pil_img, new_img_size) + np_img, pad = self.preprocess_image(pil_img) # ONNX inference y = self.ort_session.run(["output0"], {"images": np_img})[0][0] @@ -136,12 +130,17 @@ def __call__(self, pil_img: Image.Image, occlusion_mask: Optional[np.ndarray] = # Sort by confidence y = y[y[:, 4].argsort()] y = nms(y) + y = y[::-1] # Normalize preds if len(y) > 0: - # Normalize Output - y[:, :4:2] /= new_img_size[0] - y[:, 1:4:2] /= new_img_size[1] + # Remove padding + left_pad, top_pad = pad + y[:, :4:2] -= left_pad + y[:, 1:4:2] -= top_pad + y[:, :4:2] /= self.img_size[1] - 2 * left_pad + y[:, 1:4:2] /= self.img_size[0] - 2 * top_pad + y = np.clip(y, 0, 1) else: y = np.zeros((0, 5)) # normalize output @@ -162,4 +161,4 @@ def __call__(self, pil_img: Image.Image, occlusion_mask: Optional[np.ndarray] = y = y[keep] - return np.clip(y, 0, 1) + return y diff --git a/tests/test_vision.py b/tests/test_vision.py index 894bb83..98a378f 100644 --- a/tests/test_vision.py +++ b/tests/test_vision.py @@ -30,9 +30,10 @@ def test_classifier(mock_wildfire_image): # Instantiate the ONNX model model = Classifier() # Check preprocessing - out = model.preprocess_image(mock_wildfire_image, (640, 384)) + out, pad = model.preprocess_image(mock_wildfire_image) assert isinstance(out, np.ndarray) and out.dtype == np.float32 - assert out.shape == (1, 3, 384, 640) + assert out.shape == (1, 3, 640, 640) + assert isinstance(pad, tuple) # Check inference out = model(mock_wildfire_image) assert out.shape == (1, 5) @@ -40,12 +41,14 @@ def test_classifier(mock_wildfire_image): assert conf >= 0 and conf <= 1 # Test mask - mask = np.ones((640, 384)) + mask = np.ones((384, 640)) out = model(mock_wildfire_image, mask) + print(out) assert out.shape == (1, 5) - mask = np.zeros((640, 384)) + mask = np.zeros((384, 640)) out = model(mock_wildfire_image, mask) + print(out) assert out.shape == (0, 5) os.remove(model_path) os.remove(METADATA_PATH)