ID and HOTA metric performance improvements (#23)

* Improve ID metrics to use less memory * Improve HOTA and ID metrics computation * One more simplification
tadejsv · Oct 5, 2022 · ce6df1f · ce6df1f
1 parent 4188cf4
commit ce6df1f
Show file tree

Hide file tree

Showing 5 changed files with 117 additions and 109 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 
 
+## [0.1.8] - 2022-10-05
+
+## Changed
+
+* Improve HOTA metrics to use less memory ([#23](https://github.com/tadejsv/EvalDeT/pull/23))
+* Improve ID metrics to use less memory ([#23](https://github.com/tadejsv/EvalDeT/pull/23))
+
 ## [0.1.7] - 2022-10-02
 
 ## Fixed

diff --git a/src/evaldet/__init__.py b/src/evaldet/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.1.7"
+__version__ = "0.1.8"
 
 from .metrics import MOTMetrics  # noqa: F401
 from .tracks import Tracks  # noqa: F401
diff --git a/src/evaldet/mot_metrics/hota.py b/src/evaldet/mot_metrics/hota.py
@@ -2,10 +2,10 @@
 import typing as t
 
 import numpy as np
-from scipy import sparse
 from scipy.optimize import linear_sum_assignment
 
 from ..tracks import Tracks
+from ..utils import sparse
 from .base import MOTMetricBase
 
 _EPS = 1 / 1000
@@ -23,21 +23,14 @@ class HOTAResults(t.TypedDict):
     LocA_alpha: np.ndarray
 
 
-def _create_coo_array(
-    vals_list: t.Dict[t.Tuple[int, int], int], shape: t.Tuple[int, int]
-) -> sparse.coo_array:
-    row_inds = np.array(tuple(x[0] for x in vals_list.keys()))
-    col_inds = np.array(tuple(x[1] for x in vals_list.keys()))
-    vals = np.array(tuple(vals_list.values()))
-
-    return sparse.coo_array((vals, (row_inds, col_inds)), shape=shape)
-
-
 class HOTAMetrics(MOTMetricBase):
     def _calculate_hota_metrics(
         self, ground_truth: Tracks, hypotheses: Tracks
     ) -> HOTAResults:
 
+        alphas = np.arange(0.05, 0.96, 0.05)  # from 0.05 to 0.95 inclusive
+        all_frames = sorted(set(ground_truth.frames).intersection(hypotheses.frames))
+
         gts = tuple(ground_truth.ids_count.keys())
         gts_counts = tuple(ground_truth.ids_count.values())
         gts_id_ind_dict = {_id: ind for ind, _id in enumerate(gts)}
@@ -47,83 +40,75 @@ def _calculate_hota_metrics(
         hyps_id_ind_dict = {_id: ind for ind, _id in enumerate(hyps)}
 
         n_gt, n_hyp = len(gts), len(hyps)
-
-        alphas = np.arange(0.05, 0.96, 0.05)  # from 0.05 to 0.95 inclusive
-
-        # The arrays should all have the shape [n_alphas, n_gt, n_hyp]
-        TPA_max = np.zeros((len(alphas), n_gt, n_hyp), dtype=np.int32)
-        FPA_max = np.tile(np.tile(hyps_counts, (n_gt, 1)), (len(alphas), 1, 1))
-        FNA_max = np.tile(np.tile(gts_counts, (n_hyp, 1)).T, (len(alphas), 1, 1))
-        TPA_max_vals: t.List[t.Dict[t.Tuple[int, int], int]] = [
-            co.defaultdict(int) for _ in range(len(alphas))
-        ]
-
-        TPA, FPA, FNA = TPA_max.copy(), FPA_max.copy(), FNA_max.copy()
-        FP = np.ones((len(alphas),)) * sum(hyps_counts)
-        FN = np.ones((len(alphas),)) * sum(gts_counts)
-        LocAs = np.zeros((len(alphas),))  # Accumulator of similarities
-
-        # Do the optimisitc matching - allow multiple matches per gt/hyp in same frame
-        for frame in sorted(set(ground_truth.frames).intersection(hypotheses.frames)):
-            dist_matrix = self._get_iou_frame(frame)
-
-            gt_frame_inds = [gts_id_ind_dict[_id] for _id in ground_truth[frame].ids]
-            hyp_frame_inds = [hyps_id_ind_dict[_id] for _id in hypotheses[frame].ids]
-
-            for a_ind in range(len(alphas)):
-                for row_ind, col_ind in np.argwhere(dist_matrix < alphas[a_ind]):
-                    TPA_max_vals[a_ind][
-                        (gt_frame_inds[row_ind], hyp_frame_inds[col_ind])
-                    ] += 1
-
-        for a_ind in range(len(alphas)):
-            TPA_max[a_ind] = _create_coo_array(
-                TPA_max_vals[a_ind], (n_gt, n_hyp)
-            ).toarray()
-
-        # Compute optimistic A_max, to be used for actual matching
-        A_max = TPA_max / (FNA_max + FPA_max - TPA_max)
-
-        # Do the actual matching
-        TPA_vals: t.List[t.Dict[t.Tuple[int, int], int]] = [
-            co.defaultdict(int) for _ in range(len(alphas))
-        ]
-        for frame in sorted(set(ground_truth.frames).intersection(hypotheses.frames)):
-            dist_matrix = self._get_iou_frame(frame)
-            dist_cost = (1 - dist_matrix) * _EPS
-
-            gt_ids_f = ground_truth[frame].ids
-            hyp_ids_f = hypotheses[frame].ids
-            gt_frame_inds = [gts_id_ind_dict[_id] for _id in gt_ids_f]
-            hyp_frame_inds = [hyps_id_ind_dict[_id] for _id in hyp_ids_f]
-
-            for a_ind in range(len(alphas)):
-                opt_matrix = ((dist_matrix < alphas[a_ind]) / _EPS).astype(np.float64)
-                opt_matrix += A_max[a_ind][np.ix_(gt_frame_inds, hyp_frame_inds)]
+        FP, FN = sum(hyps_counts), sum(gts_counts)
+
+        DetAs = np.zeros_like(alphas)
+        AssAs = np.zeros_like(alphas)
+        LocAs = np.zeros_like(alphas)
+
+        for a_ind, alpha in enumerate(alphas):
+            # The arrays should all have the shape [n_gt, n_hyp]
+            FPA_max = np.tile(hyps_counts, (n_gt, 1))
+            FNA_max = np.tile(gts_counts, (n_hyp, 1)).T
+            TPA_max_vals: t.Dict[t.Tuple[int, int], int] = co.defaultdict(int)
+
+            FPA, FNA = FPA_max.copy(), FNA_max.copy()
+            locs = 0.0  # Accumulator of similarities
+
+            # Do the optimisitc matching - allow multiple matches per gt/hyp in the
+            # same frame
+            for frame in all_frames:
+                dist_matrix = self._get_iou_frame(frame)
+
+                gt_frame_inds = [
+                    gts_id_ind_dict[_id] for _id in ground_truth[frame].ids
+                ]
+                hyp_frame_inds = [
+                    hyps_id_ind_dict[_id] for _id in hypotheses[frame].ids
+                ]
+
+                for row_ind, col_ind in np.argwhere(dist_matrix < alpha):
+                    TPA_max_vals[(gt_frame_inds[row_ind], hyp_frame_inds[col_ind])] += 1
+
+            TPA_max = sparse.create_coo_array(TPA_max_vals, (n_gt, n_hyp)).toarray()
+
+            # Compute optimistic A_max, to be used for actual matching
+            A_max = TPA_max / (FNA_max + FPA_max - TPA_max)
+
+            # Do the actual matching
+            TPA_vals: t.Dict[t.Tuple[int, int], int] = co.defaultdict(int)
+            for frame in all_frames:
+                dist_matrix = self._get_iou_frame(frame)
+                dist_cost = (1 - dist_matrix) * _EPS
+
+                gt_ids_f = ground_truth[frame].ids
+                hyp_ids_f = hypotheses[frame].ids
+                gt_frame_inds = [gts_id_ind_dict[_id] for _id in gt_ids_f]
+                hyp_frame_inds = [hyps_id_ind_dict[_id] for _id in hyp_ids_f]
+
+                opt_matrix = ((dist_matrix < alpha) / _EPS).astype(np.float64)
+                opt_matrix += A_max[np.ix_(gt_frame_inds, hyp_frame_inds)]
                 opt_matrix += dist_cost
 
                 # Calculate matching as a LAP
                 matching_inds = linear_sum_assignment(opt_matrix, maximize=True)
-
                 for row_ind, col_ind in zip(*matching_inds):
-                    if dist_matrix[row_ind, col_ind] < alphas[a_ind]:
-                        TPA_vals[a_ind][
-                            (gt_frame_inds[row_ind], hyp_frame_inds[col_ind])
-                        ] += 1
-                        LocAs[a_ind] += 1 - dist_matrix[row_ind, col_ind]
-
-        for a_ind in range(len(alphas)):
-            TPA[a_ind] = _create_coo_array(TPA_vals[a_ind], (n_gt, n_hyp)).toarray()
-
-        # Compute proper scores
-        TP = TPA.sum(axis=(1, 2))
-        A = TPA / (FNA + FPA - TPA)
-        DetAs = TP / (FN + FP - TP)
-        AssAs = (TPA * A).sum(axis=(1, 2)) / np.maximum(TP, 1)
-        HOTAs = np.sqrt(DetAs * AssAs)
+                    if dist_matrix[row_ind, col_ind] < alpha:
+                        TPA_vals[(gt_frame_inds[row_ind], hyp_frame_inds[col_ind])] += 1
+                        locs += 1 - dist_matrix[row_ind, col_ind]
+
+            TPA = sparse.create_coo_array(TPA_vals, (n_gt, n_hyp)).toarray()
 
-        # If no matches -> full similarity [strange default]
-        LocAs = np.maximum(LocAs, 1e-10) / np.maximum(TP, 1e-10)
+            # Compute proper scores
+            TP = TPA.sum()
+            A = TPA / (FNA + FPA - TPA)
+            DetAs[a_ind] = TP / (FN + FP - TP)
+            AssAs[a_ind] = (TPA * A).sum() / max(TP, 1)
+
+            # If no matches -> full similarity [strange default]
+            LocAs[a_ind] = np.maximum(locs, 1e-10) / np.maximum(TP, 1e-10)
+
+        HOTAs = np.sqrt(DetAs * AssAs)
 
         return {
             "HOTA": HOTAs.mean(),

diff --git a/src/evaldet/mot_metrics/identity.py b/src/evaldet/mot_metrics/identity.py
@@ -1,9 +1,11 @@
+import collections as co
 import typing as t
 
 import numpy as np
 from scipy.optimize import linear_sum_assignment
 
 from ..tracks import Tracks
+from ..utils import sparse
 from .base import MOTMetricBase
 
 
@@ -20,50 +22,42 @@ class IDMetrics(MOTMetricBase):
     def _calculate_id_metrics(
         self, ground_truth: Tracks, hypotheses: Tracks, dist_threshold: float = 0.5
     ) -> IDResults:
-
         gts = tuple(ground_truth.ids_count.keys())
         gts_id_ind_dict = {_id: ind for ind, _id in enumerate(gts)}
-        gts_counts = tuple(ground_truth.ids_count.values())
-
         hyps = tuple(hypotheses.ids_count.keys())
         hyps_id_ind_dict = {_id: ind for ind, _id in enumerate(hyps)}
-        hyps_counts = tuple(hypotheses.ids_count.values())
-        n_gt, n_hyp = len(gts), len(hyps)
 
-        # The "real" shape is [n_gt, n_hyp], the rest is for fictional
-        # entries that are needed for FP and FN matrix to make the
-        # LAP problem minimize the actual loss, including for unmatched entries
-        matching = np.zeros((max(n_gt, n_hyp), max(n_gt, n_hyp)), dtype=np.int32)
+        gts_counts = np.array(tuple(ground_truth.ids_count.values()), dtype=np.int32)
+        hyps_counts = np.array(tuple(hypotheses.ids_count.values()), dtype=np.int32)
 
+        matches: t.Dict[t.Tuple[int, int], int] = co.defaultdict(int)
         for frame in sorted(set(ground_truth.frames).intersection(hypotheses.frames)):
             dist_matrix = self._get_iou_frame(frame)
             gt_frame_inds = [gts_id_ind_dict[_id] for _id in ground_truth[frame].ids]
             htp_frame_inds = [hyps_id_ind_dict[_id] for _id in hypotheses[frame].ids]
 
             for gt_ind, hyp_ind in np.argwhere(dist_matrix < dist_threshold):
-                matching[gt_frame_inds[gt_ind], htp_frame_inds[hyp_ind]] += 1
-
-        fn_matrix, fp_matrix = np.zeros_like(matching), np.zeros_like(matching)
-        fp_matrix[:, :n_hyp] = np.tile(hyps_counts, (max(n_hyp, n_gt), 1))
-        fn_matrix[:n_gt, :] = np.tile(gts_counts, (max(n_hyp, n_gt), 1)).T
-
-        cost_matrix = fp_matrix + fn_matrix - 2 * matching
+                matches[(gt_frame_inds[gt_ind], htp_frame_inds[hyp_ind])] += 1
 
         # Calculate matching as a LAP, get FN, FP and TP from matched entries
-        matching_inds = linear_sum_assignment(cost_matrix)
-        true_positive = matching[matching_inds].sum()
-        false_negative = fn_matrix[matching_inds].sum() - true_positive
-        false_positive = fp_matrix[matching_inds].sum() - true_positive
+        # row_m_inds, col_m_inds = linear_sum_assignment(cost_matrix)
+        matches_matrix = sparse.create_coo_array(matches, (len(gts), len(hyps)))
+        matches_array = matches_matrix.toarray()
+        row_m_inds, col_m_inds = linear_sum_assignment(matches_array, maximize=True)
 
         # Calculate the final results
-        idp = true_positive / (true_positive + false_positive)
-        idr = true_positive / (true_positive + false_negative)
-        idf1 = 2 * true_positive / (2 * true_positive + false_positive + false_negative)
+        TPs = matches_array[row_m_inds, col_m_inds].sum()
+        FNs = gts_counts.sum() - TPs
+        FPs = hyps_counts.sum() - TPs
+
+        idp = TPs / (TPs + FPs)
+        idr = TPs / (TPs + FNs)
+        idf1 = 2 * TPs / (2 * TPs + FPs + FNs)
 
         return {
-            "IDTP": true_positive,
-            "IDFP": false_positive,
-            "IDFN": false_negative,
+            "IDTP": TPs,
+            "IDFP": FPs,
+            "IDFN": FNs,
             "IDP": idp,
             "IDR": idr,
             "IDF1": idf1,

diff --git a/src/evaldet/utils/sparse.py b/src/evaldet/utils/sparse.py
@@ -0,0 +1,22 @@
+import typing as t
+
+import numpy as np
+from scipy import sparse
+
+
+def create_coo_array(
+    vals_dict: t.Dict[t.Tuple[int, int], int], shape: t.Tuple[int, int]
+) -> sparse.coo_array:
+    """Create a sparse COO array.
+
+    Args:
+        vals_dict: A dictionary with values. The key should be a tuple of
+            ``(row_ind, col_ind)``, and the value should be the entry for the cell
+            at that index.
+        shape: Shape of the new array: ``(n_rows, n_cols)``
+    """
+    row_inds = np.array(tuple(x[0] for x in vals_dict.keys()))
+    col_inds = np.array(tuple(x[1] for x in vals_dict.keys()))
+    vals = np.array(tuple(vals_dict.values()))
+
+    return sparse.coo_array((vals, (row_inds, col_inds)), shape=shape)