Skip to content

Commit

Permalink
ID and HOTA metric performance improvements (#23)
Browse files Browse the repository at this point in the history
* Improve ID metrics to use less memory

* Improve HOTA and ID metrics computation

* One more simplification
  • Loading branch information
tadejsv authored Oct 5, 2022
1 parent 4188cf4 commit ce6df1f
Show file tree
Hide file tree
Showing 5 changed files with 117 additions and 109 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).


## [0.1.8] - 2022-10-05

## Changed

* Improve HOTA metrics to use less memory ([#23](https://github.com/tadejsv/EvalDeT/pull/23))
* Improve ID metrics to use less memory ([#23](https://github.com/tadejsv/EvalDeT/pull/23))

## [0.1.7] - 2022-10-02

## Fixed
Expand Down
2 changes: 1 addition & 1 deletion src/evaldet/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "0.1.7"
__version__ = "0.1.8"

from .metrics import MOTMetrics # noqa: F401
from .tracks import Tracks # noqa: F401
149 changes: 67 additions & 82 deletions src/evaldet/mot_metrics/hota.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
import typing as t

import numpy as np
from scipy import sparse
from scipy.optimize import linear_sum_assignment

from ..tracks import Tracks
from ..utils import sparse
from .base import MOTMetricBase

_EPS = 1 / 1000
Expand All @@ -23,21 +23,14 @@ class HOTAResults(t.TypedDict):
LocA_alpha: np.ndarray


def _create_coo_array(
vals_list: t.Dict[t.Tuple[int, int], int], shape: t.Tuple[int, int]
) -> sparse.coo_array:
row_inds = np.array(tuple(x[0] for x in vals_list.keys()))
col_inds = np.array(tuple(x[1] for x in vals_list.keys()))
vals = np.array(tuple(vals_list.values()))

return sparse.coo_array((vals, (row_inds, col_inds)), shape=shape)


class HOTAMetrics(MOTMetricBase):
def _calculate_hota_metrics(
self, ground_truth: Tracks, hypotheses: Tracks
) -> HOTAResults:

alphas = np.arange(0.05, 0.96, 0.05) # from 0.05 to 0.95 inclusive
all_frames = sorted(set(ground_truth.frames).intersection(hypotheses.frames))

gts = tuple(ground_truth.ids_count.keys())
gts_counts = tuple(ground_truth.ids_count.values())
gts_id_ind_dict = {_id: ind for ind, _id in enumerate(gts)}
Expand All @@ -47,83 +40,75 @@ def _calculate_hota_metrics(
hyps_id_ind_dict = {_id: ind for ind, _id in enumerate(hyps)}

n_gt, n_hyp = len(gts), len(hyps)

alphas = np.arange(0.05, 0.96, 0.05) # from 0.05 to 0.95 inclusive

# The arrays should all have the shape [n_alphas, n_gt, n_hyp]
TPA_max = np.zeros((len(alphas), n_gt, n_hyp), dtype=np.int32)
FPA_max = np.tile(np.tile(hyps_counts, (n_gt, 1)), (len(alphas), 1, 1))
FNA_max = np.tile(np.tile(gts_counts, (n_hyp, 1)).T, (len(alphas), 1, 1))
TPA_max_vals: t.List[t.Dict[t.Tuple[int, int], int]] = [
co.defaultdict(int) for _ in range(len(alphas))
]

TPA, FPA, FNA = TPA_max.copy(), FPA_max.copy(), FNA_max.copy()
FP = np.ones((len(alphas),)) * sum(hyps_counts)
FN = np.ones((len(alphas),)) * sum(gts_counts)
LocAs = np.zeros((len(alphas),)) # Accumulator of similarities

# Do the optimisitc matching - allow multiple matches per gt/hyp in same frame
for frame in sorted(set(ground_truth.frames).intersection(hypotheses.frames)):
dist_matrix = self._get_iou_frame(frame)

gt_frame_inds = [gts_id_ind_dict[_id] for _id in ground_truth[frame].ids]
hyp_frame_inds = [hyps_id_ind_dict[_id] for _id in hypotheses[frame].ids]

for a_ind in range(len(alphas)):
for row_ind, col_ind in np.argwhere(dist_matrix < alphas[a_ind]):
TPA_max_vals[a_ind][
(gt_frame_inds[row_ind], hyp_frame_inds[col_ind])
] += 1

for a_ind in range(len(alphas)):
TPA_max[a_ind] = _create_coo_array(
TPA_max_vals[a_ind], (n_gt, n_hyp)
).toarray()

# Compute optimistic A_max, to be used for actual matching
A_max = TPA_max / (FNA_max + FPA_max - TPA_max)

# Do the actual matching
TPA_vals: t.List[t.Dict[t.Tuple[int, int], int]] = [
co.defaultdict(int) for _ in range(len(alphas))
]
for frame in sorted(set(ground_truth.frames).intersection(hypotheses.frames)):
dist_matrix = self._get_iou_frame(frame)
dist_cost = (1 - dist_matrix) * _EPS

gt_ids_f = ground_truth[frame].ids
hyp_ids_f = hypotheses[frame].ids
gt_frame_inds = [gts_id_ind_dict[_id] for _id in gt_ids_f]
hyp_frame_inds = [hyps_id_ind_dict[_id] for _id in hyp_ids_f]

for a_ind in range(len(alphas)):
opt_matrix = ((dist_matrix < alphas[a_ind]) / _EPS).astype(np.float64)
opt_matrix += A_max[a_ind][np.ix_(gt_frame_inds, hyp_frame_inds)]
FP, FN = sum(hyps_counts), sum(gts_counts)

DetAs = np.zeros_like(alphas)
AssAs = np.zeros_like(alphas)
LocAs = np.zeros_like(alphas)

for a_ind, alpha in enumerate(alphas):
# The arrays should all have the shape [n_gt, n_hyp]
FPA_max = np.tile(hyps_counts, (n_gt, 1))
FNA_max = np.tile(gts_counts, (n_hyp, 1)).T
TPA_max_vals: t.Dict[t.Tuple[int, int], int] = co.defaultdict(int)

FPA, FNA = FPA_max.copy(), FNA_max.copy()
locs = 0.0 # Accumulator of similarities

# Do the optimisitc matching - allow multiple matches per gt/hyp in the
# same frame
for frame in all_frames:
dist_matrix = self._get_iou_frame(frame)

gt_frame_inds = [
gts_id_ind_dict[_id] for _id in ground_truth[frame].ids
]
hyp_frame_inds = [
hyps_id_ind_dict[_id] for _id in hypotheses[frame].ids
]

for row_ind, col_ind in np.argwhere(dist_matrix < alpha):
TPA_max_vals[(gt_frame_inds[row_ind], hyp_frame_inds[col_ind])] += 1

TPA_max = sparse.create_coo_array(TPA_max_vals, (n_gt, n_hyp)).toarray()

# Compute optimistic A_max, to be used for actual matching
A_max = TPA_max / (FNA_max + FPA_max - TPA_max)

# Do the actual matching
TPA_vals: t.Dict[t.Tuple[int, int], int] = co.defaultdict(int)
for frame in all_frames:
dist_matrix = self._get_iou_frame(frame)
dist_cost = (1 - dist_matrix) * _EPS

gt_ids_f = ground_truth[frame].ids
hyp_ids_f = hypotheses[frame].ids
gt_frame_inds = [gts_id_ind_dict[_id] for _id in gt_ids_f]
hyp_frame_inds = [hyps_id_ind_dict[_id] for _id in hyp_ids_f]

opt_matrix = ((dist_matrix < alpha) / _EPS).astype(np.float64)
opt_matrix += A_max[np.ix_(gt_frame_inds, hyp_frame_inds)]
opt_matrix += dist_cost

# Calculate matching as a LAP
matching_inds = linear_sum_assignment(opt_matrix, maximize=True)

for row_ind, col_ind in zip(*matching_inds):
if dist_matrix[row_ind, col_ind] < alphas[a_ind]:
TPA_vals[a_ind][
(gt_frame_inds[row_ind], hyp_frame_inds[col_ind])
] += 1
LocAs[a_ind] += 1 - dist_matrix[row_ind, col_ind]

for a_ind in range(len(alphas)):
TPA[a_ind] = _create_coo_array(TPA_vals[a_ind], (n_gt, n_hyp)).toarray()

# Compute proper scores
TP = TPA.sum(axis=(1, 2))
A = TPA / (FNA + FPA - TPA)
DetAs = TP / (FN + FP - TP)
AssAs = (TPA * A).sum(axis=(1, 2)) / np.maximum(TP, 1)
HOTAs = np.sqrt(DetAs * AssAs)
if dist_matrix[row_ind, col_ind] < alpha:
TPA_vals[(gt_frame_inds[row_ind], hyp_frame_inds[col_ind])] += 1
locs += 1 - dist_matrix[row_ind, col_ind]

TPA = sparse.create_coo_array(TPA_vals, (n_gt, n_hyp)).toarray()

# If no matches -> full similarity [strange default]
LocAs = np.maximum(LocAs, 1e-10) / np.maximum(TP, 1e-10)
# Compute proper scores
TP = TPA.sum()
A = TPA / (FNA + FPA - TPA)
DetAs[a_ind] = TP / (FN + FP - TP)
AssAs[a_ind] = (TPA * A).sum() / max(TP, 1)

# If no matches -> full similarity [strange default]
LocAs[a_ind] = np.maximum(locs, 1e-10) / np.maximum(TP, 1e-10)

HOTAs = np.sqrt(DetAs * AssAs)

return {
"HOTA": HOTAs.mean(),
Expand Down
46 changes: 20 additions & 26 deletions src/evaldet/mot_metrics/identity.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import collections as co
import typing as t

import numpy as np
from scipy.optimize import linear_sum_assignment

from ..tracks import Tracks
from ..utils import sparse
from .base import MOTMetricBase


Expand All @@ -20,50 +22,42 @@ class IDMetrics(MOTMetricBase):
def _calculate_id_metrics(
self, ground_truth: Tracks, hypotheses: Tracks, dist_threshold: float = 0.5
) -> IDResults:

gts = tuple(ground_truth.ids_count.keys())
gts_id_ind_dict = {_id: ind for ind, _id in enumerate(gts)}
gts_counts = tuple(ground_truth.ids_count.values())

hyps = tuple(hypotheses.ids_count.keys())
hyps_id_ind_dict = {_id: ind for ind, _id in enumerate(hyps)}
hyps_counts = tuple(hypotheses.ids_count.values())
n_gt, n_hyp = len(gts), len(hyps)

# The "real" shape is [n_gt, n_hyp], the rest is for fictional
# entries that are needed for FP and FN matrix to make the
# LAP problem minimize the actual loss, including for unmatched entries
matching = np.zeros((max(n_gt, n_hyp), max(n_gt, n_hyp)), dtype=np.int32)
gts_counts = np.array(tuple(ground_truth.ids_count.values()), dtype=np.int32)
hyps_counts = np.array(tuple(hypotheses.ids_count.values()), dtype=np.int32)

matches: t.Dict[t.Tuple[int, int], int] = co.defaultdict(int)
for frame in sorted(set(ground_truth.frames).intersection(hypotheses.frames)):
dist_matrix = self._get_iou_frame(frame)
gt_frame_inds = [gts_id_ind_dict[_id] for _id in ground_truth[frame].ids]
htp_frame_inds = [hyps_id_ind_dict[_id] for _id in hypotheses[frame].ids]

for gt_ind, hyp_ind in np.argwhere(dist_matrix < dist_threshold):
matching[gt_frame_inds[gt_ind], htp_frame_inds[hyp_ind]] += 1

fn_matrix, fp_matrix = np.zeros_like(matching), np.zeros_like(matching)
fp_matrix[:, :n_hyp] = np.tile(hyps_counts, (max(n_hyp, n_gt), 1))
fn_matrix[:n_gt, :] = np.tile(gts_counts, (max(n_hyp, n_gt), 1)).T

cost_matrix = fp_matrix + fn_matrix - 2 * matching
matches[(gt_frame_inds[gt_ind], htp_frame_inds[hyp_ind])] += 1

# Calculate matching as a LAP, get FN, FP and TP from matched entries
matching_inds = linear_sum_assignment(cost_matrix)
true_positive = matching[matching_inds].sum()
false_negative = fn_matrix[matching_inds].sum() - true_positive
false_positive = fp_matrix[matching_inds].sum() - true_positive
# row_m_inds, col_m_inds = linear_sum_assignment(cost_matrix)
matches_matrix = sparse.create_coo_array(matches, (len(gts), len(hyps)))
matches_array = matches_matrix.toarray()
row_m_inds, col_m_inds = linear_sum_assignment(matches_array, maximize=True)

# Calculate the final results
idp = true_positive / (true_positive + false_positive)
idr = true_positive / (true_positive + false_negative)
idf1 = 2 * true_positive / (2 * true_positive + false_positive + false_negative)
TPs = matches_array[row_m_inds, col_m_inds].sum()
FNs = gts_counts.sum() - TPs
FPs = hyps_counts.sum() - TPs

idp = TPs / (TPs + FPs)
idr = TPs / (TPs + FNs)
idf1 = 2 * TPs / (2 * TPs + FPs + FNs)

return {
"IDTP": true_positive,
"IDFP": false_positive,
"IDFN": false_negative,
"IDTP": TPs,
"IDFP": FPs,
"IDFN": FNs,
"IDP": idp,
"IDR": idr,
"IDF1": idf1,
Expand Down
22 changes: 22 additions & 0 deletions src/evaldet/utils/sparse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import typing as t

import numpy as np
from scipy import sparse


def create_coo_array(
vals_dict: t.Dict[t.Tuple[int, int], int], shape: t.Tuple[int, int]
) -> sparse.coo_array:
"""Create a sparse COO array.
Args:
vals_dict: A dictionary with values. The key should be a tuple of
``(row_ind, col_ind)``, and the value should be the entry for the cell
at that index.
shape: Shape of the new array: ``(n_rows, n_cols)``
"""
row_inds = np.array(tuple(x[0] for x in vals_dict.keys()))
col_inds = np.array(tuple(x[1] for x in vals_dict.keys()))
vals = np.array(tuple(vals_dict.values()))

return sparse.coo_array((vals, (row_inds, col_inds)), shape=shape)

0 comments on commit ce6df1f

Please sign in to comment.