Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add filter_boxes method #332

Merged
merged 8 commits into from
Jul 1, 2021
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion doctr/utils/geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
from .common_types import BoundingBox, Polygon4P, RotatedBbox

__all__ = ['rbbox_to_polygon', 'bbox_to_polygon', 'polygon_to_bbox', 'polygon_to_rbbox',
'resolve_enclosing_bbox', 'resolve_enclosing_bbox', 'fit_rbbox']
'resolve_enclosing_bbox', 'resolve_enclosing_bbox', 'fit_rbbox',
'resolve_enclosing_bboxarray']


def bbox_to_polygon(bbox: BoundingBox) -> Polygon4P:
Expand Down Expand Up @@ -44,3 +45,12 @@ def resolve_enclosing_bbox(bboxes: List[BoundingBox]) -> BoundingBox:
def resolve_enclosing_rbbox(rbboxes: List[RotatedBbox]) -> RotatedBbox:
pts = np.asarray([pt for rbbox in rbboxes for pt in rbbox_to_polygon(rbbox)], np.float32)
return fit_rbbox(pts)


def resolve_enclosing_bboxarray(bboxarray: np.ndarray) -> np.ndarray:
charlesmindee marked this conversation as resolved.
Show resolved Hide resolved
"""Compute enclosing bbox from an array of boxes: (*, 5), where boxes have this shape:
(xmin, ymin, xmax, ymax, score).
Return a (1, 5) array (enclosing boxarray)
"""
xmin, ymin, xmax, ymax, score = np.split(bboxarray, 5, axis=1)
return np.array([xmin.min(), ymin.min(), xmax.max(), ymax.max(), score.mean()])
charlesmindee marked this conversation as resolved.
Show resolved Hide resolved
70 changes: 69 additions & 1 deletion doctr/utils/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
from scipy.optimize import linear_sum_assignment
from doctr.utils.geometry import rbbox_to_polygon

__all__ = ['TextMatch', 'box_iou', 'mask_iou', 'rbox_to_mask', 'LocalizationConfusion', 'OCRMetric']
__all__ = ['TextMatch', 'box_iou', 'box_ioa', 'mask_iou', 'rbox_to_mask',
'nms', 'LocalizationConfusion', 'OCRMetric']


def string_match(word1: str, word2: str) -> Tuple[bool, bool, bool, bool]:
Expand Down Expand Up @@ -143,6 +144,35 @@ def box_iou(boxes_1: np.ndarray, boxes_2: np.ndarray) -> np.ndarray:
return iou_mat


def box_ioa(boxes_1: np.ndarray, boxes_2: np.ndarray) -> np.ndarray:
"""Compute the IoA (intersection over area) between two sets of bounding boxes:
ioa(i, j) = inter(i, j) / area(i)

Args:
boxes_1: bounding boxes of shape (N, 4) in format (xmin, ymin, xmax, ymax)
boxes_2: bounding boxes of shape (M, 4) in format (xmin, ymin, xmax, ymax)
Returns:
the IoA matrix of shape (N, M)
"""

ioa_mat = np.zeros((boxes_1.shape[0], boxes_2.shape[0]), dtype=np.float32)

if boxes_1.shape[0] > 0 and boxes_2.shape[0] > 0:
l1, t1, r1, b1 = np.split(boxes_1, 4, axis=1)
l2, t2, r2, b2 = np.split(boxes_2, 4, axis=1)

left = np.maximum(l1, l2.T)
top = np.maximum(t1, t2.T)
right = np.minimum(r1, r2.T)
bot = np.minimum(b1, b2.T)

intersection = np.clip(right - left, 0, np.Inf) * np.clip(bot - top, 0, np.Inf)
area = (r1 - l1) * (b1 - t1)
ioa_mat = intersection / area

return ioa_mat


def mask_iou(masks_1: np.ndarray, masks_2: np.ndarray) -> np.ndarray:
"""Compute the IoU between two sets of boolean masks

Expand Down Expand Up @@ -200,6 +230,44 @@ def rbox_to_mask(boxes: np.ndarray, shape: Tuple[int, int]) -> np.ndarray:
return masks.astype(bool)


def nms(boxes: np.ndarray, thresh: float = .5) -> List[int]:
"""Perform non-max suppression, borrowed from <https://github.com/rbgirshick/fast-rcnn>`_.

Args:
boxes: np array of straight boxes: (*, 5), (xmin, ymin, xmax, ymax, score)
thresh: iou threshold to perform box suppression.

Returns:
A list of box indexes to keep
"""
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
scores = boxes[:, 4]

areas = (x2 - x1) * (y2 - y1)
order = scores.argsort()[::-1]

keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])

w = np.maximum(0.0, xx2 - xx1)
h = np.maximum(0.0, yy2 - yy1)
inter = w * h
ovr = inter / (areas[i] + areas[order[1:]] - inter)

inds = np.where(ovr <= thresh)[0]
order = order[inds + 1]
return keep


class LocalizationConfusion:
"""Implements common confusion metrics and mean IoU for localization evaluation.

Expand Down
5 changes: 5 additions & 0 deletions test/test_utils_geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,8 @@ def test_resolve_enclosing_rbbox():
pred = geometry.resolve_enclosing_rbbox([(.2, .2, .05, .05, 0), (.2, .2, .2, .2, 0)])[:4]
target = (.2, .2, .2, .2)
assert all(abs(i - j) <= 1e-7 for (i, j) in zip(pred, target))


def test_resolve_bboxarray():
pred = geometry.resolve_enclosing_bboxarray(np.array([[0.1, 0.1, 0.2, 0.2, 0.9], [0.15, 0.15, 0.2, 0.2, 0.8]]))
assert pred.all() == np.array([0.1, 0.1, 0.2, 0.2, 0.85]).all()
charlesmindee marked this conversation as resolved.
Show resolved Hide resolved
21 changes: 21 additions & 0 deletions test/test_utils_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,3 +179,24 @@ def test_ocr_metric(
metric.reset()
assert metric.num_gts == metric.num_preds == metric.tot_iou == 0
assert metric.raw_matches == metric.caseless_matches == metric.unidecode_matches == metric.unicase_matches == 0


def test_nms():
boxes = [
[0.1, 0.1, 0.2, 0.2, 0.95],
[0.15, 0.15, 0.19, 0.2, 0.90], # to suppress
[0.5, 0.5, 0.6, 0.55, 0.90],
[0.55, 0.5, 0.7, 0.55, 0.85], # to suppress
]
to_keep = metrics.nms(np.asarray(boxes), thresh=0.2)
assert to_keep == [0, 2]


def test_box_ioa():
boxes = [
[0.1, 0.1, 0.2, 0.2],
[0.15, 0.15, 0.2, 0.2],
]
mat = metrics.box_ioa(np.array(boxes), np.array(boxes))
assert mat[1, 0] == mat[0, 0] == mat[1, 1] == 1.
assert abs(mat[0, 1] - .25) <= 1e-7