pyronear · MateoLostanlen · Jul 24, 2023 · Jul 15, 2023 · Jul 18, 2023 · Jul 18, 2023
diff --git a/pyroengine/engine.py b/pyroengine/engine.py
@@ -248,20 +248,27 @@ def predict(self, frame: Image.Image, cam_id: Optional[str] = None) -> float:
 
         if is_day_time(self._cache, frame, self.day_time_strategy):
             # Inference with ONNX
-            pred = float(self.model(frame.convert("RGB")))
+            preds = self.model(frame.convert("RGB"))
+            if len(preds) == 0:
+                conf = 0
+                localization = ""
+            else:
+                conf = float(np.max(preds[:, -1]))
+                localization = str(json.dumps(preds.tolist()))
+
             # Log analysis result
             device_str = f"Camera '{cam_id}' - " if isinstance(cam_id, str) else ""
-            pred_str = "Wildfire detected" if pred >= self.conf_thresh else "No wildfire"
-            logging.info(f"{device_str}{pred_str} (confidence: {pred:.2%})")
+            pred_str = "Wildfire detected" if conf >= self.conf_thresh else "No wildfire"
+            logging.info(f"{device_str}{pred_str} (confidence: {conf:.2%})")
 
             # Alert
 
-            to_be_staged = self._update_states(pred, cam_key)
+            to_be_staged = self._update_states(conf, cam_key)
             if to_be_staged and len(self.api_client) > 0 and isinstance(cam_id, str):
                 # Save the alert in cache to avoid connection issues
-                self._stage_alert(frame_resize, cam_id)
+                self._stage_alert(frame_resize, cam_id, localization)
         else:
-            pred = 0  # return default value
+            conf = 0  # return default value
 
         # Uploading pending alerts
         if len(self._alerts) > 0:
@@ -289,7 +296,7 @@ def predict(self, frame: Image.Image, cam_id: Optional[str] = None) -> float:
                 except ConnectionError:
                     stream.seek(0)  # "Rewind" the stream to the beginning so we can read its content
 
-        return pred
+        return conf
 
     def _upload_frame(self, cam_id: str, media_data: bytes) -> Response:
         """Save frame"""
@@ -303,7 +310,7 @@ def _upload_frame(self, cam_id: str, media_data: bytes) -> Response:
 
         return response
 
-    def _stage_alert(self, frame: Image.Image, cam_id: str) -> None:
+    def _stage_alert(self, frame: Image.Image, cam_id: str, localization: str) -> None:
         # Store information in the queue
         self._alerts.append(
             {
@@ -312,6 +319,7 @@ def _stage_alert(self, frame: Image.Image, cam_id: str) -> None:
                 "ts": datetime.utcnow().isoformat(),
                 "media_id": None,
                 "alert_id": None,
+                "localization": localization,
             }
         )
 
@@ -335,9 +343,10 @@ def _process_alerts(self) -> None:
                     self._alerts[0]["alert_id"] = (
                         self.api_client[cam_id]
                         .send_alert_from_device(
-                            self.latitude,
-                            self.longitude,
-                            self._alerts[0]["media_id"],
+                            lat=self.latitude,
+                            lon=self.longitude,
+                            media_id=self._alerts[0]["media_id"],
+                            localization=self._alerts[0]["localization"],
                         )
                         .json()["id"]
                     )

diff --git a/pyroengine/utils.py b/pyroengine/utils.py
@@ -7,10 +7,19 @@
 import cv2
 import numpy as np
 
-__all__ = ["letterbox"]
+__all__ = ["letterbox", "NMS", "xywh2xyxy"]
 
 
-def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, stride=32):
+def xywh2xyxy(x):
+    y = np.copy(x)
+    y[..., 0] = x[..., 0] - x[..., 2] / 2  # top left x
+    y[..., 1] = x[..., 1] - x[..., 3] / 2  # top left y
+    y[..., 2] = x[..., 0] + x[..., 2] / 2  # bottom right x
+    y[..., 3] = x[..., 1] + x[..., 3] / 2  # bottom right y
+    return y
+
+
+def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=False, stride=32):
     """Letterbox image transform for yolo models
     Args:
         im (np.array): Input image
@@ -51,3 +60,50 @@ def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, stride
     im_b[top : top + h, left : left + w, :] = im
 
     return im_b.astype("uint8")
+
+
+def box_iou(box1, box2, eps=1e-7):
+    """
+    Calculate intersection-over-union (IoU) of boxes.
+    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
+    Based on https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
+
+    Args:
+        box1 (np.array): A numpy array of shape (N, 4) representing N bounding boxes.
+        box2 (np.array): A numpy array of shape (M, 4) representing M bounding boxes.
+        eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7.
+
+    Returns:
+        (np.array): An NxM numpy array containing the pairwise IoU values for every element in box1 and box2.
+    """
+
+    (a1, a2), (b1, b2) = np.split(box1, 2, 1), np.split(box2, 2, 1)
+    inter = (np.minimum(a2, b2[:, None, :]) - np.maximum(a1, b1[:, None, :])).clip(0).prod(2)
+
+    # IoU = inter / (area1 + area2 - inter)
+    return inter / ((a2 - a1).prod(1) + (b2 - b1).prod(1)[:, None] - inter + eps)
+
+
+def NMS(boxes, overlapThresh=0):
+    """Non maximum suppression
+
+    Args:
+        boxes (np.array): A numpy array of shape (N, 4) representing N bounding boxes in (x1, y1, x2, y2, conf) format
+        overlapThresh (int, optional): iou threshold. Defaults to 0.
+
+    Returns:
+        boxes: Boxes after NMS
+    """
+    # Return an empty list, if no boxes given
+    boxes = boxes[boxes[:, -1].argsort()]
+    if len(boxes) == 0:
+        return []
+
+    indices = np.arange(len(boxes))
+    rr = box_iou(boxes[:, :4], boxes[:, :4])
+    for i, box in enumerate(boxes):
+        temp_indices = indices[indices != i]
+        if np.any(rr[i, temp_indices] > overlapThresh):
+            indices = indices[indices != i]
+
+    return boxes[indices]
diff --git a/pyroengine/vision.py b/pyroengine/vision.py
@@ -11,7 +11,7 @@
 import onnxruntime
 from PIL import Image
 
-from .utils import letterbox
+from .utils import NMS, letterbox, xywh2xyxy
 
 __all__ = ["Classifier"]
 
@@ -29,16 +29,17 @@ class Classifier:
         model_path: model path
     """
 
-    def __init__(self, model_path: Optional[str] = "data/model.onnx") -> None:
+    def __init__(self, model_path: Optional[str] = "data/model.onnx", img_size=(384, 640)) -> None:
         # Download model if not available
         if not os.path.isfile(model_path):
             os.makedirs(os.path.split(model_path)[0], exist_ok=True)
             print(f"Downloading model from {MODEL_URL} ...")
             urllib.request.urlretrieve(MODEL_URL, model_path)
 
         self.ort_session = onnxruntime.InferenceSession(model_path)
+        self.img_size = img_size
 
-    def preprocess_image(self, pil_img: Image.Image, img_size=(640, 384)) -> np.ndarray:
+    def preprocess_image(self, pil_img: Image.Image) -> np.ndarray:
         """Preprocess an image for inference
 
         Args:
@@ -49,7 +50,7 @@ def preprocess_image(self, pil_img: Image.Image, img_size=(640, 384)) -> np.ndar
             the resized and normalized image of shape (1, C, H, W)
         """
 
-        np_img = letterbox(np.array(pil_img))  # letterbox
+        np_img = letterbox(np.array(pil_img), self.img_size)  # letterbox
         np_img = np.expand_dims(np_img.astype("float"), axis=0)
         np_img = np.ascontiguousarray(np_img.transpose((0, 3, 1, 2)))  # BHWC to BCHW
         np_img = np_img.astype("float32") / 255
@@ -60,8 +61,16 @@ def __call__(self, pil_img: Image.Image) -> np.ndarray:
         np_img = self.preprocess_image(pil_img)
 
         # ONNX inference
-        y = self.ort_session.run(["output0"], {"images": np_img})[0]
-        # Non maximum suppression need to be added here when we will use the location information
-        # let's avoid useless compute for now
-
-        return np.max(y[0, :, 4])
+        y = self.ort_session.run(["output0"], {"images": np_img})[0][0]
+        # Post processing
+        y = np.transpose(y)
+        y = xywh2xyxy(y)
+        # Sort by confidence
+        y = y[y[:, 4].argsort()]
+        y = NMS(y)
+        # Normalize preds
+        if len(y) > 0:
+            y[:, :4:2] /= self.img_size[1]
+            y[:, 1:4:2] /= self.img_size[0]
+
+        return y