diff --git a/Dockerfile b/Dockerfile index 1788fe34..c90e7222 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,6 +15,9 @@ COPY ./pyproject.toml /tmp/pyproject.toml COPY ./README.md /tmp/README.md COPY ./setup.py /tmp/setup.py +# install git +RUN apt-get update && apt-get install git -y + COPY ./src/requirements.txt /tmp/requirements.txt RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 -y\ && pip install --upgrade pip setuptools wheel \ diff --git a/pyproject.toml b/pyproject.toml index d598903b..ae6b2e3d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ dependencies = [ "Pillow>=8.4.0", "onnxruntime>=1.10.0,<2.0.0", "numpy>=1.19.5,<2.0.0", - "pyroclient>=0.1.2", + "pyroclient @ git+https://github.com/pyronear/pyro-api.git@bbox#egg=pkg&subdirectory=client", "requests>=2.20.0,<3.0.0", "opencv-python==4.5.5.64", ] diff --git a/pyroengine/engine.py b/pyroengine/engine.py index b5a7126e..72ee262c 100644 --- a/pyroengine/engine.py +++ b/pyroengine/engine.py @@ -248,20 +248,27 @@ def predict(self, frame: Image.Image, cam_id: Optional[str] = None) -> float: if is_day_time(self._cache, frame, self.day_time_strategy): # Inference with ONNX - pred = float(self.model(frame.convert("RGB"))) + preds = self.model(frame.convert("RGB")) + if len(preds) == 0: + conf = 0 + localization = "" + else: + conf = float(np.max(preds[:, -1])) + localization = str(json.dumps(preds.tolist())) + # Log analysis result device_str = f"Camera '{cam_id}' - " if isinstance(cam_id, str) else "" - pred_str = "Wildfire detected" if pred >= self.conf_thresh else "No wildfire" - logging.info(f"{device_str}{pred_str} (confidence: {pred:.2%})") + pred_str = "Wildfire detected" if conf >= self.conf_thresh else "No wildfire" + logging.info(f"{device_str}{pred_str} (confidence: {conf:.2%})") # Alert - to_be_staged = self._update_states(pred, cam_key) + to_be_staged = self._update_states(conf, cam_key) if to_be_staged and len(self.api_client) > 0 and isinstance(cam_id, str): # Save the alert in cache to avoid connection issues - self._stage_alert(frame_resize, cam_id) + self._stage_alert(frame_resize, cam_id, localization) else: - pred = 0 # return default value + conf = 0 # return default value # Uploading pending alerts if len(self._alerts) > 0: @@ -289,7 +296,7 @@ def predict(self, frame: Image.Image, cam_id: Optional[str] = None) -> float: except ConnectionError: stream.seek(0) # "Rewind" the stream to the beginning so we can read its content - return pred + return float(conf) def _upload_frame(self, cam_id: str, media_data: bytes) -> Response: """Save frame""" @@ -303,7 +310,7 @@ def _upload_frame(self, cam_id: str, media_data: bytes) -> Response: return response - def _stage_alert(self, frame: Image.Image, cam_id: str) -> None: + def _stage_alert(self, frame: Image.Image, cam_id: str, localization: str) -> None: # Store information in the queue self._alerts.append( { @@ -312,6 +319,7 @@ def _stage_alert(self, frame: Image.Image, cam_id: str) -> None: "ts": datetime.utcnow().isoformat(), "media_id": None, "alert_id": None, + "localization": localization, } ) @@ -335,9 +343,10 @@ def _process_alerts(self) -> None: self._alerts[0]["alert_id"] = ( self.api_client[cam_id] .send_alert_from_device( - self.latitude, - self.longitude, - self._alerts[0]["media_id"], + lat=self.latitude, + lon=self.longitude, + media_id=self._alerts[0]["media_id"], + localization=self._alerts[0]["localization"], ) .json()["id"] ) diff --git a/pyroengine/utils.py b/pyroengine/utils.py index 730ba0fc..f1d3dc51 100644 --- a/pyroengine/utils.py +++ b/pyroengine/utils.py @@ -7,10 +7,21 @@ import cv2 import numpy as np -__all__ = ["letterbox"] +__all__ = ["letterbox", "nms", "xywh2xyxy"] -def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, stride=32): +def xywh2xyxy(x: np.array): + y = np.copy(x) + y[..., 0] = x[..., 0] - x[..., 2] / 2 # top left x + y[..., 1] = x[..., 1] - x[..., 3] / 2 # top left y + y[..., 2] = x[..., 0] + x[..., 2] / 2 # bottom right x + y[..., 3] = x[..., 1] + x[..., 3] / 2 # bottom right y + return y + + +def letterbox( + im: np.array, new_shape: tuple = (640, 640), color: tuple = (114, 114, 114), auto: bool = False, stride: int = 32 +): """Letterbox image transform for yolo models Args: im (np.array): Input image @@ -51,3 +62,50 @@ def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, stride im_b[top : top + h, left : left + w, :] = im return im_b.astype("uint8") + + +def box_iou(box1: np.array, box2: np.array, eps: float = 1e-7): + """ + Calculate intersection-over-union (IoU) of boxes. + Both sets of boxes are expected to be in (x1, y1, x2, y2) format. + Based on https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py + + Args: + box1 (np.array): A numpy array of shape (N, 4) representing N bounding boxes. + box2 (np.array): A numpy array of shape (M, 4) representing M bounding boxes. + eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7. + + Returns: + (np.array): An NxM numpy array containing the pairwise IoU values for every element in box1 and box2. + """ + + (a1, a2), (b1, b2) = np.split(box1, 2, 1), np.split(box2, 2, 1) + inter = (np.minimum(a2, b2[:, None, :]) - np.maximum(a1, b1[:, None, :])).clip(0).prod(2) + + # IoU = inter / (area1 + area2 - inter) + return inter / ((a2 - a1).prod(1) + (b2 - b1).prod(1)[:, None] - inter + eps) + + +def nms(boxes: np.array, overlapThresh: int = 0): + """Non maximum suppression + + Args: + boxes (np.array): A numpy array of shape (N, 4) representing N bounding boxes in (x1, y1, x2, y2, conf) format + overlapThresh (int, optional): iou threshold. Defaults to 0. + + Returns: + boxes: Boxes after NMS + """ + # Return an empty list, if no boxes given + boxes = boxes[boxes[:, -1].argsort()] + if len(boxes) == 0: + return [] + + indices = np.arange(len(boxes)) + rr = box_iou(boxes[:, :4], boxes[:, :4]) + for i, box in enumerate(boxes): + temp_indices = indices[indices != i] + if np.any(rr[i, temp_indices] > overlapThresh): + indices = indices[indices != i] + + return boxes[indices] diff --git a/pyroengine/vision.py b/pyroengine/vision.py index d9673b64..53e2d2ec 100644 --- a/pyroengine/vision.py +++ b/pyroengine/vision.py @@ -11,11 +11,11 @@ import onnxruntime from PIL import Image -from .utils import letterbox +from .utils import letterbox, nms, xywh2xyxy __all__ = ["Classifier"] -MODEL_URL = "https://github.com/pyronear/pyro-vision/releases/download/v0.2.0/yolov5s_v002.onnx" +MODEL_URL = "https://github.com/pyronear/pyro-vision/releases/download/v0.2.0/yolov8s_v001.onnx" class Classifier: @@ -29,7 +29,7 @@ class Classifier: model_path: model path """ - def __init__(self, model_path: Optional[str] = "data/model.onnx") -> None: + def __init__(self, model_path: Optional[str] = "data/model.onnx", img_size: tuple = (384, 640)) -> None: # Download model if not available if not os.path.isfile(model_path): os.makedirs(os.path.split(model_path)[0], exist_ok=True) @@ -37,8 +37,9 @@ def __init__(self, model_path: Optional[str] = "data/model.onnx") -> None: urllib.request.urlretrieve(MODEL_URL, model_path) self.ort_session = onnxruntime.InferenceSession(model_path) + self.img_size = img_size - def preprocess_image(self, pil_img: Image.Image, img_size=(640, 384)) -> np.ndarray: + def preprocess_image(self, pil_img: Image.Image) -> np.ndarray: """Preprocess an image for inference Args: @@ -49,7 +50,7 @@ def preprocess_image(self, pil_img: Image.Image, img_size=(640, 384)) -> np.ndar the resized and normalized image of shape (1, C, H, W) """ - np_img = letterbox(np.array(pil_img)) # letterbox + np_img = letterbox(np.array(pil_img), self.img_size) # letterbox np_img = np.expand_dims(np_img.astype("float"), axis=0) np_img = np.ascontiguousarray(np_img.transpose((0, 3, 1, 2))) # BHWC to BCHW np_img = np_img.astype("float32") / 255 @@ -60,8 +61,18 @@ def __call__(self, pil_img: Image.Image) -> np.ndarray: np_img = self.preprocess_image(pil_img) # ONNX inference - y = self.ort_session.run(["output0"], {"images": np_img})[0] - # Non maximum suppression need to be added here when we will use the location information - # let's avoid useless compute for now - - return np.max(y[0, :, 4]) + y = self.ort_session.run(["output0"], {"images": np_img})[0][0] + # Drop low conf for speed-up + y = y[:, y[-1, :] > 0.05] + # Post processing + y = np.transpose(y) + y = xywh2xyxy(y) + # Sort by confidence + y = y[y[:, 4].argsort()] + y = nms(y) + # Normalize preds + if len(y) > 0: + y[:, :4:2] /= self.img_size[1] + y[:, 1:4:2] /= self.img_size[0] + + return y diff --git a/src/run.py b/src/run.py index 2356f2a8..9bdf5fb9 100644 --- a/src/run.py +++ b/src/run.py @@ -96,7 +96,7 @@ def main(args): parser.add_argument( "--alert_relaxation", type=int, - default=2, + default=3, help="Number of consecutive positive detections required to send the first alert", ) parser.add_argument( diff --git a/tests/test_engine.py b/tests/test_engine.py index 1b8f648b..c1e3d17d 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -16,7 +16,7 @@ def test_engine_offline(tmpdir_factory, mock_wildfire_image, mock_forest_image): # Cache saving _ts = datetime.utcnow().isoformat() - engine._stage_alert(mock_wildfire_image, 0) + engine._stage_alert(mock_wildfire_image, 0, localization="dummy") assert len(engine._alerts) == 1 assert engine._alerts[0]["ts"] < datetime.utcnow().isoformat() and _ts < engine._alerts[0]["ts"] assert engine._alerts[0]["media_id"] is None @@ -37,16 +37,17 @@ def test_engine_offline(tmpdir_factory, mock_wildfire_image, mock_forest_image): engine._dump_cache() # Cache dump loading - engine = Engine(cache_folder=folder + "model.onnx") + engine = Engine(cache_folder=folder) assert len(engine._alerts) == 1 engine.clear_cache() # inference - engine = Engine(alert_relaxation=3, cache_folder=folder + "model.onnx") + engine = Engine(alert_relaxation=3, cache_folder=folder) out = engine.predict(mock_forest_image) assert isinstance(out, float) and 0 <= out <= 1 assert engine._states["-1"]["consec"] == 0 out = engine.predict(mock_wildfire_image) + assert isinstance(out, float) and 0 <= out <= 1 assert engine._states["-1"]["consec"] == 1 # Alert relaxation diff --git a/tests/test_vision.py b/tests/test_vision.py index 70b76f6a..a0733f49 100644 --- a/tests/test_vision.py +++ b/tests/test_vision.py @@ -12,4 +12,6 @@ def test_classifier(mock_wildfire_image): assert out.shape == (1, 3, 384, 640) # Check inference out = model(mock_wildfire_image) - assert out >= 0 and out <= 1 + assert out.shape == (1, 5) + conf = np.max(out[:, 4]) + assert conf >= 0 and conf <= 1