From c5bc94dff5e31d19c2ae8d9880dd5488ee125940 Mon Sep 17 00:00:00 2001 From: sailfish009 Date: Thu, 16 Jul 2020 13:48:30 +0900 Subject: [PATCH 1/2] fix AT_CHECK --- mmdet/ops/carafe/src/carafe_cuda.cpp | 4 +- mmdet/ops/carafe/src/carafe_naive_cuda.cpp | 4 +- mmdet/ops/dcn/src/deform_conv_cuda.cpp | 42 +++++++++---------- mmdet/ops/dcn/src/deform_pool_cuda.cpp | 6 +-- .../masked_conv/src/masked_conv2d_cuda.cpp | 4 +- mmdet/ops/nms/src/nms_cuda.cpp | 2 +- mmdet/ops/roi_align/src/roi_align_cuda.cpp | 4 +- mmdet/ops/roi_pool/src/roi_pool_cuda.cpp | 4 +- 8 files changed, 35 insertions(+), 35 deletions(-) diff --git a/mmdet/ops/carafe/src/carafe_cuda.cpp b/mmdet/ops/carafe/src/carafe_cuda.cpp index 9a7c73af..97a99419 100644 --- a/mmdet/ops/carafe/src/carafe_cuda.cpp +++ b/mmdet/ops/carafe/src/carafe_cuda.cpp @@ -24,9 +24,9 @@ int CARAFEBackwardLaucher(const at::Tensor top_grad, const at::Tensor rfeatures, at::Tensor rmask_grad, at::Tensor bottom_grad, at::Tensor mask_grad); -#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") +#define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") #define CHECK_CONTIGUOUS(x) \ - AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") + TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") #define CHECK_INPUT(x) \ CHECK_CUDA(x); \ CHECK_CONTIGUOUS(x) diff --git a/mmdet/ops/carafe/src/carafe_naive_cuda.cpp b/mmdet/ops/carafe/src/carafe_naive_cuda.cpp index fbcda80e..71af5388 100644 --- a/mmdet/ops/carafe/src/carafe_naive_cuda.cpp +++ b/mmdet/ops/carafe/src/carafe_naive_cuda.cpp @@ -18,9 +18,9 @@ int CARAFENAIVEBackwardLaucher(const at::Tensor top_grad, const int height, const int width, at::Tensor bottom_grad, at::Tensor mask_grad); -#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") +#define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") #define CHECK_CONTIGUOUS(x) \ - AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") + TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") #define CHECK_INPUT(x) \ CHECK_CUDA(x); \ CHECK_CONTIGUOUS(x) diff --git a/mmdet/ops/dcn/src/deform_conv_cuda.cpp b/mmdet/ops/dcn/src/deform_conv_cuda.cpp index 2321e023..8ef0d231 100644 --- a/mmdet/ops/dcn/src/deform_conv_cuda.cpp +++ b/mmdet/ops/dcn/src/deform_conv_cuda.cpp @@ -63,26 +63,26 @@ void shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput, at::Tensor weight, int kH, int kW, int dH, int dW, int padH, int padW, int dilationH, int dilationW, int group, int deformable_group) { - AT_CHECK(weight.ndimension() == 4, + TORCH_CHECK(weight.ndimension() == 4, "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, " "but got: %s", weight.ndimension()); - AT_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); + TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); - AT_CHECK(kW > 0 && kH > 0, + TORCH_CHECK(kW > 0 && kH > 0, "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); - AT_CHECK((weight.size(2) == kH && weight.size(3) == kW), + TORCH_CHECK((weight.size(2) == kH && weight.size(3) == kW), "kernel size should be consistent with weight, ", "but got kH: %d kW: %d weight.size(2): %d, weight.size(3): %d", kH, kW, weight.size(2), weight.size(3)); - AT_CHECK(dW > 0 && dH > 0, + TORCH_CHECK(dW > 0 && dH > 0, "stride should be greater than zero, but got dH: %d dW: %d", dH, dW); - AT_CHECK( + TORCH_CHECK( dilationW > 0 && dilationH > 0, "dilation should be greater than 0, but got dilationH: %d dilationW: %d", dilationH, dilationW); @@ -98,7 +98,7 @@ void shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput, dimw++; } - AT_CHECK(ndim == 3 || ndim == 4, "3D or 4D input tensor expected but got: %s", + TORCH_CHECK(ndim == 3 || ndim == 4, "3D or 4D input tensor expected but got: %s", ndim); long nInputPlane = weight.size(1) * group; @@ -110,7 +110,7 @@ void shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput, long outputWidth = (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; - AT_CHECK(nInputPlane % deformable_group == 0, + TORCH_CHECK(nInputPlane % deformable_group == 0, "input channels must divide deformable group size"); if (outputWidth < 1 || outputHeight < 1) @@ -120,27 +120,27 @@ void shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput, nInputPlane, inputHeight, inputWidth, nOutputPlane, outputHeight, outputWidth); - AT_CHECK(input.size(1) == nInputPlane, + TORCH_CHECK(input.size(1) == nInputPlane, "invalid number of input planes, expected: %d, but got: %d", nInputPlane, input.size(1)); - AT_CHECK((inputHeight >= kH && inputWidth >= kW), + TORCH_CHECK((inputHeight >= kH && inputWidth >= kW), "input image is smaller than kernel"); - AT_CHECK((offset.size(2) == outputHeight && offset.size(3) == outputWidth), + TORCH_CHECK((offset.size(2) == outputHeight && offset.size(3) == outputWidth), "invalid spatial size of offset, expected height: %d width: %d, but " "got height: %d width: %d", outputHeight, outputWidth, offset.size(2), offset.size(3)); - AT_CHECK((offset.size(1) == deformable_group * 2 * kH * kW), + TORCH_CHECK((offset.size(1) == deformable_group * 2 * kH * kW), "invalid number of channels of offset"); if (gradOutput != NULL) { - AT_CHECK(gradOutput->size(dimf) == nOutputPlane, + TORCH_CHECK(gradOutput->size(dimf) == nOutputPlane, "invalid number of gradOutput planes, expected: %d, but got: %d", nOutputPlane, gradOutput->size(dimf)); - AT_CHECK((gradOutput->size(dimh) == outputHeight && + TORCH_CHECK((gradOutput->size(dimh) == outputHeight && gradOutput->size(dimw) == outputWidth), "invalid size of gradOutput, expected height: %d width: %d , but " "got height: %d width: %d", @@ -191,7 +191,7 @@ int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight, long outputHeight = (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; - AT_CHECK((offset.size(0) == batchSize), "invalid batch size of offset"); + TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset"); output = output.view({batchSize / im2col_step, im2col_step, nOutputPlane, outputHeight, outputWidth}); @@ -298,7 +298,7 @@ int deform_conv_backward_input_cuda(at::Tensor input, at::Tensor offset, long outputHeight = (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; - AT_CHECK((offset.size(0) == batchSize), 3, "invalid batch size of offset"); + TORCH_CHECK((offset.size(0) == batchSize), 3, "invalid batch size of offset"); gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth}); columns = at::zeros( {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, @@ -414,7 +414,7 @@ int deform_conv_backward_parameters_cuda( long outputHeight = (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; - AT_CHECK((offset.size(0) == batchSize), "invalid batch size of offset"); + TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset"); columns = at::zeros( {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, @@ -494,8 +494,8 @@ void modulated_deform_conv_cuda_forward( const int pad_h, const int pad_w, const int dilation_h, const int dilation_w, const int group, const int deformable_group, const bool with_bias) { - AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); - AT_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); + TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); + TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); at::DeviceGuard guard(input.device()); const int batch = input.size(0); @@ -576,8 +576,8 @@ void modulated_deform_conv_cuda_backward( int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, int pad_w, int dilation_h, int dilation_w, int group, int deformable_group, const bool with_bias) { - AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); - AT_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); + TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); + TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); at::DeviceGuard guard(input.device()); const int batch = input.size(0); diff --git a/mmdet/ops/dcn/src/deform_pool_cuda.cpp b/mmdet/ops/dcn/src/deform_pool_cuda.cpp index 9e0e3ffc..f6f087b8 100644 --- a/mmdet/ops/dcn/src/deform_pool_cuda.cpp +++ b/mmdet/ops/dcn/src/deform_pool_cuda.cpp @@ -33,7 +33,7 @@ void deform_psroi_pooling_cuda_forward( at::Tensor top_count, const int no_trans, const float spatial_scale, const int output_dim, const int group_size, const int pooled_size, const int part_size, const int sample_per_part, const float trans_std) { - AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); + TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); at::DeviceGuard guard(input.device()); const int batch = input.size(0); @@ -59,8 +59,8 @@ void deform_psroi_pooling_cuda_backward( const int no_trans, const float spatial_scale, const int output_dim, const int group_size, const int pooled_size, const int part_size, const int sample_per_part, const float trans_std) { - AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous"); - AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); + TORCH_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous"); + TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); at::DeviceGuard guard(input.device()); const int batch = input.size(0); diff --git a/mmdet/ops/masked_conv/src/masked_conv2d_cuda.cpp b/mmdet/ops/masked_conv/src/masked_conv2d_cuda.cpp index 6c2a8f6a..8bc5d505 100644 --- a/mmdet/ops/masked_conv/src/masked_conv2d_cuda.cpp +++ b/mmdet/ops/masked_conv/src/masked_conv2d_cuda.cpp @@ -17,9 +17,9 @@ int MaskedCol2imForwardLaucher(const at::Tensor col, const int height, const at::Tensor mask_w_idx, const int mask_cnt, at::Tensor im); -#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") +#define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") #define CHECK_CONTIGUOUS(x) \ - AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") + TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") #define CHECK_INPUT(x) \ CHECK_CUDA(x); \ CHECK_CONTIGUOUS(x) diff --git a/mmdet/ops/nms/src/nms_cuda.cpp b/mmdet/ops/nms/src/nms_cuda.cpp index 274c7248..f53359de 100644 --- a/mmdet/ops/nms/src/nms_cuda.cpp +++ b/mmdet/ops/nms/src/nms_cuda.cpp @@ -1,7 +1,7 @@ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. #include -#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") +#define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh); diff --git a/mmdet/ops/roi_align/src/roi_align_cuda.cpp b/mmdet/ops/roi_align/src/roi_align_cuda.cpp index 268f6907..6e6cc76f 100644 --- a/mmdet/ops/roi_align/src/roi_align_cuda.cpp +++ b/mmdet/ops/roi_align/src/roi_align_cuda.cpp @@ -34,9 +34,9 @@ at::Tensor ROIAlignBackwardV2Laucher( const int sampling_ratio, bool aligned); #endif -#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") +#define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") #define CHECK_CONTIGUOUS(x) \ - AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") + TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") #define CHECK_INPUT(x) \ CHECK_CUDA(x); \ CHECK_CONTIGUOUS(x) diff --git a/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp b/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp index 87e39be8..7ba60a2a 100644 --- a/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp +++ b/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp @@ -16,9 +16,9 @@ int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, const int num_rois, const int pooled_h, const int pooled_w, at::Tensor bottom_grad); -#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") +#define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") #define CHECK_CONTIGUOUS(x) \ - AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") + TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") #define CHECK_INPUT(x) \ CHECK_CUDA(x); \ CHECK_CONTIGUOUS(x) From 69eb463cfff20de8b013a4330e53e6a36607373c Mon Sep 17 00:00:00 2001 From: sailfish009 Date: Sun, 2 Aug 2020 11:16:11 +0900 Subject: [PATCH 2/2] update --- mmdet/core/bbox/__init__.py | 38 +- mmdet/core/bbox/assigners/__init__.py | 3 +- .../bbox/assigners/approx_max_iou_assigner.py | 46 +-- mmdet/core/bbox/assigners/assign_result.py | 64 ++-- mmdet/core/bbox/assigners/atss_assigner.py | 35 +- mmdet/core/bbox/assigners/base_assigner.py | 2 + .../bbox/assigners/center_region_assigner.py | 335 ++++++++++++++++++ mmdet/core/bbox/assigners/max_iou_assigner.py | 69 ++-- mmdet/core/bbox/assigners/point_assigner.py | 19 +- mmdet/core/bbox/builder.py | 20 ++ mmdet/core/bbox/coder/__init__.py | 10 + mmdet/core/bbox/coder/base_bbox_coder.py | 19 + .../core/bbox/coder/delta_xywh_bbox_coder.py | 197 ++++++++++ .../coder/legacy_delta_xywh_bbox_coder.py | 212 +++++++++++ mmdet/core/bbox/coder/pseudo_bbox_coder.py | 18 + mmdet/core/bbox/coder/tblr_bbox_coder.py | 165 +++++++++ mmdet/core/bbox/demodata.py | 6 +- mmdet/core/bbox/iou_calculators/__init__.py | 4 + mmdet/core/bbox/iou_calculators/builder.py | 8 + .../bbox/iou_calculators/iou2d_calculator.py | 130 +++++++ mmdet/core/bbox/samplers/__init__.py | 3 +- mmdet/core/bbox/samplers/base_sampler.py | 3 + mmdet/core/bbox/samplers/combined_sampler.py | 6 +- .../samplers/instance_balanced_pos_sampler.py | 18 +- .../bbox/samplers/iou_balanced_neg_sampler.py | 26 +- mmdet/core/bbox/samplers/ohem_sampler.py | 43 ++- mmdet/core/bbox/samplers/pseudo_sampler.py | 19 +- mmdet/core/bbox/samplers/random_sampler.py | 16 +- mmdet/core/bbox/samplers/sampling_result.py | 42 ++- mmdet/core/bbox/samplers/score_hlr_sampler.py | 261 ++++++++++++++ mmdet/core/bbox/transforms.py | 112 ++++-- mmdet/datasets/pipelines/transforms.py | 2 +- mmdet/models/bbox_heads/double_bbox_head.py | 2 +- mmdet/models/losses/__init__.py | 4 +- mmdet/models/losses/iou_loss.py | 139 ++++++-- mmdet/models/necks/hrfpn.py | 2 +- mmdet/ops/conv_ws.py | 2 +- mmdet/ops/nms/nms_wrapper.py | 3 +- requirements/build.txt | 2 +- requirements/runtime.txt | 4 +- 40 files changed, 1898 insertions(+), 211 deletions(-) create mode 100644 mmdet/core/bbox/assigners/center_region_assigner.py create mode 100644 mmdet/core/bbox/builder.py create mode 100644 mmdet/core/bbox/coder/__init__.py create mode 100644 mmdet/core/bbox/coder/base_bbox_coder.py create mode 100644 mmdet/core/bbox/coder/delta_xywh_bbox_coder.py create mode 100644 mmdet/core/bbox/coder/legacy_delta_xywh_bbox_coder.py create mode 100644 mmdet/core/bbox/coder/pseudo_bbox_coder.py create mode 100644 mmdet/core/bbox/coder/tblr_bbox_coder.py create mode 100644 mmdet/core/bbox/iou_calculators/__init__.py create mode 100644 mmdet/core/bbox/iou_calculators/builder.py create mode 100644 mmdet/core/bbox/iou_calculators/iou2d_calculator.py create mode 100644 mmdet/core/bbox/samplers/score_hlr_sampler.py diff --git a/mmdet/core/bbox/__init__.py b/mmdet/core/bbox/__init__.py index a0de9172..b16c5dab 100644 --- a/mmdet/core/bbox/__init__.py +++ b/mmdet/core/bbox/__init__.py @@ -1,22 +1,30 @@ -from .assigners import AssignResult, BaseAssigner, MaxIoUAssigner -from .bbox_target import bbox_target -from .geometry import bbox_overlaps +from .assigners import (AssignResult, BaseAssigner, CenterRegionAssigner, + MaxIoUAssigner) +from .builder import build_assigner, build_bbox_coder, build_sampler +from .coder import (BaseBBoxCoder, DeltaXYWHBBoxCoder, PseudoBBoxCoder, + TBLRBBoxCoder) +from .iou_calculators import BboxOverlaps2D, bbox_overlaps from .samplers import (BaseSampler, CombinedSampler, InstanceBalancedPosSampler, IoUBalancedNegSampler, - PseudoSampler, RandomSampler, SamplingResult) -from .transforms import (bbox2delta, bbox2result, bbox2roi, bbox_flip, - bbox_mapping, bbox_mapping_back, delta2bbox, - distance2bbox, roi2bbox) + OHEMSampler, PseudoSampler, RandomSampler, + SamplingResult, ScoreHLRSampler) +from .transforms import (bbox2distance, bbox2result, bbox2roi, bbox_flip, + bbox_mapping, bbox_mapping_back, distance2bbox, + roi2bbox, bbox2delta, delta2bbox) + + +from .bbox_target import bbox_target +from .assign_sampling import assign_and_sample -from .assign_sampling import ( # isort:skip, avoid recursive imports - assign_and_sample, build_assigner, build_sampler) __all__ = [ - 'bbox_overlaps', 'BaseAssigner', 'MaxIoUAssigner', 'AssignResult', - 'BaseSampler', 'PseudoSampler', 'RandomSampler', + 'bbox_overlaps', 'BboxOverlaps2D', 'BaseAssigner', 'MaxIoUAssigner', + 'AssignResult', 'BaseSampler', 'PseudoSampler', 'RandomSampler', 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', - 'SamplingResult', 'build_assigner', 'build_sampler', 'assign_and_sample', - 'bbox2delta', 'delta2bbox', 'bbox_flip', 'bbox_mapping', - 'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result', - 'distance2bbox', 'bbox_target' + 'OHEMSampler', 'SamplingResult', 'ScoreHLRSampler', 'build_assigner', + 'build_sampler', 'bbox_flip', 'bbox_mapping', 'bbox_mapping_back', + 'bbox2roi', 'roi2bbox', 'bbox2result', 'distance2bbox', 'bbox2distance', + 'build_bbox_coder', 'BaseBBoxCoder', 'PseudoBBoxCoder', + 'DeltaXYWHBBoxCoder', 'TBLRBBoxCoder', 'CenterRegionAssigner', + 'assign_and_sample', 'bbox2delta', 'delta2bbox', 'bbox_target' ] diff --git a/mmdet/core/bbox/assigners/__init__.py b/mmdet/core/bbox/assigners/__init__.py index 4ed1d564..f72306b8 100644 --- a/mmdet/core/bbox/assigners/__init__.py +++ b/mmdet/core/bbox/assigners/__init__.py @@ -2,10 +2,11 @@ from .assign_result import AssignResult from .atss_assigner import ATSSAssigner from .base_assigner import BaseAssigner +from .center_region_assigner import CenterRegionAssigner from .max_iou_assigner import MaxIoUAssigner from .point_assigner import PointAssigner __all__ = [ 'BaseAssigner', 'MaxIoUAssigner', 'ApproxMaxIoUAssigner', 'AssignResult', - 'PointAssigner', 'ATSSAssigner' + 'PointAssigner', 'ATSSAssigner', 'CenterRegionAssigner' ] diff --git a/mmdet/core/bbox/assigners/approx_max_iou_assigner.py b/mmdet/core/bbox/assigners/approx_max_iou_assigner.py index f157a6b4..6d07656d 100644 --- a/mmdet/core/bbox/assigners/approx_max_iou_assigner.py +++ b/mmdet/core/bbox/assigners/approx_max_iou_assigner.py @@ -1,18 +1,19 @@ import torch -from ..geometry import bbox_overlaps +from ..builder import BBOX_ASSIGNERS +from ..iou_calculators import build_iou_calculator from .max_iou_assigner import MaxIoUAssigner +@BBOX_ASSIGNERS.register_module() class ApproxMaxIoUAssigner(MaxIoUAssigner): """Assign a corresponding gt bbox or background to each bbox. - Each proposals will be assigned with `-1`, `0`, or a positive integer - indicating the ground truth index. + Each proposals will be assigned with an integer indicating the ground truth + index. (semi-positive index: gt label (0-based), -1: background) - - -1: don't care - - 0: negative sample, no assigned gt - - positive integer: positive sample, index (1-based) of assigned gt + - -1: negative sample, no assigned gt + - semi-positive integer: positive sample, index (0-based) of assigned gt Args: pos_iou_thr (float): IoU threshold for positive bboxes. @@ -27,6 +28,9 @@ class ApproxMaxIoUAssigner(MaxIoUAssigner): ignoring any bboxes. ignore_wrt_candidates (bool): Whether to compute the iof between `bboxes` and `gt_bboxes_ignore`, or the contrary. + match_low_quality (bool): Whether to allow quality matches. This is + usually allowed for RPN and single stage detectors, but not allowed + in the second stage. gpu_assign_thr (int): The upper bound of the number of GT for GPU assign. When the number of gt is above this threshold, will assign on CPU device. Negative values mean not assign on CPU. @@ -39,7 +43,9 @@ def __init__(self, gt_max_assign_all=True, ignore_iof_thr=-1, ignore_wrt_candidates=True, - gpu_assign_thr=-1): + match_low_quality=True, + gpu_assign_thr=-1, + iou_calculator=dict(type='BboxOverlaps2D')): self.pos_iou_thr = pos_iou_thr self.neg_iou_thr = neg_iou_thr self.min_pos_iou = min_pos_iou @@ -47,6 +53,8 @@ def __init__(self, self.ignore_iof_thr = ignore_iof_thr self.ignore_wrt_candidates = ignore_wrt_candidates self.gpu_assign_thr = gpu_assign_thr + self.match_low_quality = match_low_quality + self.iou_calculator = build_iou_calculator(iou_calculator) def assign(self, approxs, @@ -59,14 +67,14 @@ def assign(self, This method assign a gt bbox to each group of approxs (bboxes), each group of approxs is represent by a base approx (bbox) and - will be assigned with -1, 0, or a positive number. - -1 means don't care, 0 means negative sample, - positive number is the index (1-based) of assigned gt. + will be assigned with -1, or a semi-positive number. + background_label (-1) means negative sample, + semi-positive number is the index (0-based) of assigned gt. The assignment is done in following steps, the order matters. - 1. assign every bbox to -1 + 1. assign every bbox to background_label (-1) 2. use the max IoU of each group of approxs to assign - 2. assign proposals whose iou with all gts < neg_iou_thr to 0 + 2. assign proposals whose iou with all gts < neg_iou_thr to background 3. for each bbox, if the iou with its nearest gt >= pos_iou_thr, assign it to that bbox 4. for each gt bbox, assign its nearest proposals (may be more than @@ -110,23 +118,21 @@ def assign(self, gt_bboxes_ignore = gt_bboxes_ignore.cpu() if gt_labels is not None: gt_labels = gt_labels.cpu() - all_overlaps = bbox_overlaps(approxs, gt_bboxes) + all_overlaps = self.iou_calculator(approxs, gt_bboxes) overlaps, _ = all_overlaps.view(approxs_per_octave, num_squares, num_gts).max(dim=0) overlaps = torch.transpose(overlaps, 0, 1) - bboxes = squares[:, :4] - if (self.ignore_iof_thr > 0 and gt_bboxes_ignore is not None - and gt_bboxes_ignore.numel() > 0 and bboxes.numel() > 0): + and gt_bboxes_ignore.numel() > 0 and squares.numel() > 0): if self.ignore_wrt_candidates: - ignore_overlaps = bbox_overlaps( - bboxes, gt_bboxes_ignore, mode='iof') + ignore_overlaps = self.iou_calculator( + squares, gt_bboxes_ignore, mode='iof') ignore_max_overlaps, _ = ignore_overlaps.max(dim=1) else: - ignore_overlaps = bbox_overlaps( - gt_bboxes_ignore, bboxes, mode='iof') + ignore_overlaps = self.iou_calculator( + gt_bboxes_ignore, squares, mode='iof') ignore_max_overlaps, _ = ignore_overlaps.max(dim=0) overlaps[:, ignore_max_overlaps > self.ignore_iof_thr] = -1 diff --git a/mmdet/core/bbox/assigners/assign_result.py b/mmdet/core/bbox/assigners/assign_result.py index 5e81c897..4639fbdb 100644 --- a/mmdet/core/bbox/assigners/assign_result.py +++ b/mmdet/core/bbox/assigners/assign_result.py @@ -4,8 +4,7 @@ class AssignResult(util_mixins.NiceRepr): - """ - Stores assignments between predicted and truth boxes. + """Stores assignments between predicted and truth boxes. Attributes: num_gts (int): the number of truth boxes considered when computing this @@ -45,55 +44,60 @@ def __init__(self, num_gts, gt_inds, max_overlaps, labels=None): self.gt_inds = gt_inds self.max_overlaps = max_overlaps self.labels = labels + # Interface for possible user-defined properties + self._extra_properties = {} @property def num_preds(self): - """ - Return the number of predictions in this assignment - """ + """int: the number of predictions in this assignment""" return len(self.gt_inds) + def set_extra_property(self, key, value): + """Set user-defined new property.""" + assert key not in self.info + self._extra_properties[key] = value + + def get_extra_property(self, key): + """Get user-defined property.""" + return self._extra_properties.get(key, None) + @property def info(self): - """ - Returns a dictionary of info about the object - """ - return { + """dict: a dictionary of info about the object""" + basic_info = { 'num_gts': self.num_gts, 'num_preds': self.num_preds, 'gt_inds': self.gt_inds, 'max_overlaps': self.max_overlaps, 'labels': self.labels, } + basic_info.update(self._extra_properties) + return basic_info def __nice__(self): - """ - Create a "nice" summary string describing this assign result - """ + """str: a "nice" summary string describing this assign result""" parts = [] - parts.append('num_gts={!r}'.format(self.num_gts)) + parts.append(f'num_gts={self.num_gts!r}') if self.gt_inds is None: - parts.append('gt_inds={!r}'.format(self.gt_inds)) + parts.append(f'gt_inds={self.gt_inds!r}') else: - parts.append('gt_inds.shape={!r}'.format( - tuple(self.gt_inds.shape))) + parts.append(f'gt_inds.shape={tuple(self.gt_inds.shape)!r}') if self.max_overlaps is None: - parts.append('max_overlaps={!r}'.format(self.max_overlaps)) + parts.append(f'max_overlaps={self.max_overlaps!r}') else: - parts.append('max_overlaps.shape={!r}'.format( - tuple(self.max_overlaps.shape))) + parts.append('max_overlaps.shape=' + f'{tuple(self.max_overlaps.shape)!r}') if self.labels is None: - parts.append('labels={!r}'.format(self.labels)) + parts.append(f'labels={self.labels!r}') else: - parts.append('labels.shape={!r}'.format(tuple(self.labels.shape))) + parts.append(f'labels.shape={tuple(self.labels.shape)!r}') return ', '.join(parts) @classmethod def random(cls, **kwargs): - """ - Create random AssignResult for tests or debugging. + """Create random AssignResult for tests or debugging. - Kwargs: + Args: num_preds: number of predicted boxes num_gts: number of true boxes p_ignore (float): probability of a predicted box assinged to an @@ -104,7 +108,7 @@ def random(cls, **kwargs): rng (None | int | numpy.random.RandomState): seed or state Returns: - AssignResult : + :obj:`AssignResult`: Randomly generated assign results. Example: >>> from mmdet.core.bbox.assigners.assign_result import * # NOQA @@ -172,7 +176,10 @@ def random(cls, **kwargs): labels = torch.zeros(num_preds, dtype=torch.int64) else: labels = torch.from_numpy( - rng.randint(1, num_classes + 1, size=num_preds)) + # remind that we set FG labels to [0, num_class-1] + # since mmdet v2.0 + # BG cat_id: num_class + rng.randint(0, num_classes, size=num_preds)) labels[~is_assigned] = 0 else: labels = None @@ -181,6 +188,11 @@ def random(cls, **kwargs): return self def add_gt_(self, gt_labels): + """Add ground truth as assigned results. + + Args: + gt_labels (torch.Tensor): Labels of gt boxes + """ self_inds = torch.arange( 1, len(gt_labels) + 1, dtype=torch.long, device=gt_labels.device) self.gt_inds = torch.cat([self_inds, self.gt_inds]) diff --git a/mmdet/core/bbox/assigners/atss_assigner.py b/mmdet/core/bbox/assigners/atss_assigner.py index e442ac70..8e21726b 100644 --- a/mmdet/core/bbox/assigners/atss_assigner.py +++ b/mmdet/core/bbox/assigners/atss_assigner.py @@ -1,10 +1,12 @@ import torch -from ..geometry import bbox_overlaps +from ..builder import BBOX_ASSIGNERS +from ..iou_calculators import build_iou_calculator from .assign_result import AssignResult from .base_assigner import BaseAssigner +@BBOX_ASSIGNERS.register_module() class ATSSAssigner(BaseAssigner): """Assign a corresponding gt bbox or background to each bbox. @@ -18,8 +20,13 @@ class ATSSAssigner(BaseAssigner): topk (float): number of bbox selected in each level """ - def __init__(self, topk): + def __init__(self, + topk, + iou_calculator=dict(type='BboxOverlaps2D'), + ignore_iof_thr=-1): self.topk = topk + self.iou_calculator = build_iou_calculator(iou_calculator) + self.ignore_iof_thr = ignore_iof_thr # https://github.com/sfzhang15/ATSS/blob/master/atss_core/modeling/rpn/atss/loss.py @@ -61,7 +68,7 @@ def assign(self, num_gt, num_bboxes = gt_bboxes.size(0), bboxes.size(0) # compute iou between all bbox and gt - overlaps = bbox_overlaps(bboxes, gt_bboxes) + overlaps = self.iou_calculator(bboxes, gt_bboxes) # assign 0 by default assigned_gt_inds = overlaps.new_full((num_bboxes, ), @@ -77,8 +84,9 @@ def assign(self, if gt_labels is None: assigned_labels = None else: - assigned_labels = overlaps.new_zeros((num_bboxes, ), - dtype=torch.long) + assigned_labels = overlaps.new_full((num_bboxes, ), + -1, + dtype=torch.long) return AssignResult( num_gt, assigned_gt_inds, max_overlaps, labels=assigned_labels) @@ -94,6 +102,15 @@ def assign(self, distances = (bboxes_points[:, None, :] - gt_points[None, :, :]).pow(2).sum(-1).sqrt() + if (self.ignore_iof_thr > 0 and gt_bboxes_ignore is not None + and gt_bboxes_ignore.numel() > 0 and bboxes.numel() > 0): + ignore_overlaps = self.iou_calculator( + bboxes, gt_bboxes_ignore, mode='iof') + ignore_max_overlaps, _ = ignore_overlaps.max(dim=1) + ignore_idxs = ignore_max_overlaps > self.ignore_iof_thr + distances[ignore_idxs, :] = INF + assigned_gt_inds[ignore_idxs] = -1 + # Selecting candidates based on the center distance candidate_idxs = [] start_idx = 0 @@ -102,8 +119,9 @@ def assign(self, # select k bbox whose center are closest to the gt center end_idx = start_idx + bboxes_per_level distances_per_level = distances[start_idx:end_idx, :] + selectable_k = min(self.topk, bboxes_per_level) _, topk_idxs_per_level = distances_per_level.topk( - self.topk, dim=0, largest=False) + selectable_k, dim=0, largest=False) candidate_idxs.append(topk_idxs_per_level + start_idx) start_idx = end_idx candidate_idxs = torch.cat(candidate_idxs, dim=0) @@ -148,8 +166,9 @@ def assign(self, max_overlaps != -INF] = argmax_overlaps[max_overlaps != -INF] + 1 if gt_labels is not None: - assigned_labels = assigned_gt_inds.new_zeros((num_bboxes, )) - pos_inds = torch.nonzero(assigned_gt_inds > 0).squeeze() + assigned_labels = assigned_gt_inds.new_full((num_bboxes, ), -1) + pos_inds = torch.nonzero( + assigned_gt_inds > 0, as_tuple=False).squeeze() if pos_inds.numel() > 0: assigned_labels[pos_inds] = gt_labels[ assigned_gt_inds[pos_inds] - 1] diff --git a/mmdet/core/bbox/assigners/base_assigner.py b/mmdet/core/bbox/assigners/base_assigner.py index 7bd02dce..2da9e0f4 100644 --- a/mmdet/core/bbox/assigners/base_assigner.py +++ b/mmdet/core/bbox/assigners/base_assigner.py @@ -2,7 +2,9 @@ class BaseAssigner(metaclass=ABCMeta): + """Base assigner that assigns boxes to ground truth boxes.""" @abstractmethod def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None): + """Assign boxes to either a ground truth boxe or a negative boxes.""" pass diff --git a/mmdet/core/bbox/assigners/center_region_assigner.py b/mmdet/core/bbox/assigners/center_region_assigner.py new file mode 100644 index 00000000..488e3b61 --- /dev/null +++ b/mmdet/core/bbox/assigners/center_region_assigner.py @@ -0,0 +1,335 @@ +import torch + +from ..builder import BBOX_ASSIGNERS +from ..iou_calculators import build_iou_calculator +from .assign_result import AssignResult +from .base_assigner import BaseAssigner + + +def scale_boxes(bboxes, scale): + """Expand an array of boxes by a given scale. + + Args: + bboxes (Tensor): Shape (m, 4) + scale (float): The scale factor of bboxes + + Returns: + (Tensor): Shape (m, 4). Scaled bboxes + """ + assert bboxes.size(1) == 4 + w_half = (bboxes[:, 2] - bboxes[:, 0]) * .5 + h_half = (bboxes[:, 3] - bboxes[:, 1]) * .5 + x_c = (bboxes[:, 2] + bboxes[:, 0]) * .5 + y_c = (bboxes[:, 3] + bboxes[:, 1]) * .5 + + w_half *= scale + h_half *= scale + + boxes_scaled = torch.zeros_like(bboxes) + boxes_scaled[:, 0] = x_c - w_half + boxes_scaled[:, 2] = x_c + w_half + boxes_scaled[:, 1] = y_c - h_half + boxes_scaled[:, 3] = y_c + h_half + return boxes_scaled + + +def is_located_in(points, bboxes): + """Are points located in bboxes. + + Args: + points (Tensor): Points, shape: (m, 2). + bboxes (Tensor): Bounding boxes, shape: (n, 4). + + Return: + Tensor: Flags indicating if points are located in bboxes, shape: (m, n). + """ + assert points.size(1) == 2 + assert bboxes.size(1) == 4 + return (points[:, 0].unsqueeze(1) > bboxes[:, 0].unsqueeze(0)) & \ + (points[:, 0].unsqueeze(1) < bboxes[:, 2].unsqueeze(0)) & \ + (points[:, 1].unsqueeze(1) > bboxes[:, 1].unsqueeze(0)) & \ + (points[:, 1].unsqueeze(1) < bboxes[:, 3].unsqueeze(0)) + + +def bboxes_area(bboxes): + """Compute the area of an array of bboxes. + + Args: + bboxes (Tensor): The coordinates ox bboxes. Shape: (m, 4) + + Returns: + Tensor: Area of the bboxes. Shape: (m, ) + """ + assert bboxes.size(1) == 4 + w = (bboxes[:, 2] - bboxes[:, 0]) + h = (bboxes[:, 3] - bboxes[:, 1]) + areas = w * h + return areas + + +@BBOX_ASSIGNERS.register_module() +class CenterRegionAssigner(BaseAssigner): + """Assign pixels at the center region of a bbox as positive. + + Each proposals will be assigned with `-1`, `0`, or a positive integer + indicating the ground truth index. + - -1: negative samples + - semi-positive numbers: positive sample, index (0-based) of assigned gt + + Args: + pos_scale (float): Threshold within which pixels are + labelled as positive. + neg_scale (float): Threshold above which pixels are + labelled as positive. + min_pos_iof (float): Minimum iof of a pixel with a gt to be + labelled as positive. Default: 1e-2 + ignore_gt_scale (float): Threshold within which the pixels + are ignored when the gt is labelled as shadowed. Default: 0.5 + foreground_dominate (bool): If True, the bbox will be assigned as + positive when a gt's kernel region overlaps with another's shadowed + (ignored) region, otherwise it is set as ignored. Default to False. + """ + + def __init__(self, + pos_scale, + neg_scale, + min_pos_iof=1e-2, + ignore_gt_scale=0.5, + foreground_dominate=False, + iou_calculator=dict(type='BboxOverlaps2D')): + self.pos_scale = pos_scale + self.neg_scale = neg_scale + self.min_pos_iof = min_pos_iof + self.ignore_gt_scale = ignore_gt_scale + self.foreground_dominate = foreground_dominate + self.iou_calculator = build_iou_calculator(iou_calculator) + + def get_gt_priorities(self, gt_bboxes): + """Get gt priorities according to their areas. + + Smaller gt has higher priority. + + Args: + gt_bboxes (Tensor): Ground truth boxes, shape (k, 4). + + Returns: + Tensor: The priority of gts so that gts with larger priority is \ + more likely to be assigned. Shape (k, ) + """ + gt_areas = bboxes_area(gt_bboxes) + # Rank all gt bbox areas. Smaller objects has larger priority + _, sort_idx = gt_areas.sort(descending=True) + sort_idx = sort_idx.argsort() + return sort_idx + + def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None): + """Assign gt to bboxes. + + This method assigns gts to every bbox (proposal/anchor), each bbox \ + will be assigned with -1, or a semi-positive number. -1 means \ + negative sample, semi-positive number is the index (0-based) of \ + assigned gt. + + Args: + bboxes (Tensor): Bounding boxes to be assigned, shape(n, 4). + gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4). + gt_bboxes_ignore (tensor, optional): Ground truth bboxes that are + labelled as `ignored`, e.g., crowd boxes in COCO. + gt_labels (tensor, optional): Label of gt_bboxes, shape (num_gts,). + + Returns: + :obj:`AssignResult`: The assigned result. Note that \ + shadowed_labels of shape (N, 2) is also added as an \ + `assign_result` attribute. `shadowed_labels` is a tensor \ + composed of N pairs of anchor_ind, class_label], where N \ + is the number of anchors that lie in the outer region of a \ + gt, anchor_ind is the shadowed anchor index and class_label \ + is the shadowed class label. + + Example: + >>> self = CenterRegionAssigner(0.2, 0.2) + >>> bboxes = torch.Tensor([[0, 0, 10, 10], [10, 10, 20, 20]]) + >>> gt_bboxes = torch.Tensor([[0, 0, 10, 10]]) + >>> assign_result = self.assign(bboxes, gt_bboxes) + >>> expected_gt_inds = torch.LongTensor([1, 0]) + >>> assert torch.all(assign_result.gt_inds == expected_gt_inds) + """ + # There are in total 5 steps in the pixel assignment + # 1. Find core (the center region, say inner 0.2) + # and shadow (the relatively ourter part, say inner 0.2-0.5) + # regions of every gt. + # 2. Find all prior bboxes that lie in gt_core and gt_shadow regions + # 3. Assign prior bboxes in gt_core with a one-hot id of the gt in + # the image. + # 3.1. For overlapping objects, the prior bboxes in gt_core is + # assigned with the object with smallest area + # 4. Assign prior bboxes with class label according to its gt id. + # 4.1. Assign -1 to prior bboxes lying in shadowed gts + # 4.2. Assign positive prior boxes with the corresponding label + # 5. Find pixels lying in the shadow of an object and assign them with + # background label, but set the loss weight of its corresponding + # gt to zero. + assert bboxes.size(1) == 4, 'bboxes must have size of 4' + # 1. Find core positive and shadow region of every gt + gt_core = scale_boxes(gt_bboxes, self.pos_scale) + gt_shadow = scale_boxes(gt_bboxes, self.neg_scale) + + # 2. Find prior bboxes that lie in gt_core and gt_shadow regions + bbox_centers = (bboxes[:, 2:4] + bboxes[:, 0:2]) / 2 + # The center points lie within the gt boxes + is_bbox_in_gt = is_located_in(bbox_centers, gt_bboxes) + # Only calculate bbox and gt_core IoF. This enables small prior bboxes + # to match large gts + bbox_and_gt_core_overlaps = self.iou_calculator( + bboxes, gt_core, mode='iof') + # The center point of effective priors should be within the gt box + is_bbox_in_gt_core = is_bbox_in_gt & ( + bbox_and_gt_core_overlaps > self.min_pos_iof) # shape (n, k) + + is_bbox_in_gt_shadow = ( + self.iou_calculator(bboxes, gt_shadow, mode='iof') > + self.min_pos_iof) + # Rule out center effective positive pixels + is_bbox_in_gt_shadow &= (~is_bbox_in_gt_core) + + num_gts, num_bboxes = gt_bboxes.size(0), bboxes.size(0) + if num_gts == 0 or num_bboxes == 0: + # If no gts exist, assign all pixels to negative + assigned_gt_ids = \ + is_bbox_in_gt_core.new_zeros((num_bboxes,), + dtype=torch.long) + pixels_in_gt_shadow = assigned_gt_ids.new_empty((0, 2)) + else: + # Step 3: assign a one-hot gt id to each pixel, and smaller objects + # have high priority to assign the pixel. + sort_idx = self.get_gt_priorities(gt_bboxes) + assigned_gt_ids, pixels_in_gt_shadow = \ + self.assign_one_hot_gt_indices(is_bbox_in_gt_core, + is_bbox_in_gt_shadow, + gt_priority=sort_idx) + + if gt_bboxes_ignore is not None and gt_bboxes_ignore.numel() > 0: + # No ground truth or boxes, return empty assignment + gt_bboxes_ignore = scale_boxes( + gt_bboxes_ignore, scale=self.ignore_gt_scale) + is_bbox_in_ignored_gts = is_located_in(bbox_centers, + gt_bboxes_ignore) + is_bbox_in_ignored_gts = is_bbox_in_ignored_gts.any(dim=1) + assigned_gt_ids[is_bbox_in_ignored_gts] = -1 + + # 4. Assign prior bboxes with class label according to its gt id. + assigned_labels = None + shadowed_pixel_labels = None + if gt_labels is not None: + # Default assigned label is the background (-1) + assigned_labels = assigned_gt_ids.new_full((num_bboxes, ), -1) + pos_inds = torch.nonzero( + assigned_gt_ids > 0, as_tuple=False).squeeze() + if pos_inds.numel() > 0: + assigned_labels[pos_inds] = gt_labels[assigned_gt_ids[pos_inds] + - 1] + # 5. Find pixels lying in the shadow of an object + shadowed_pixel_labels = pixels_in_gt_shadow.clone() + if pixels_in_gt_shadow.numel() > 0: + pixel_idx, gt_idx =\ + pixels_in_gt_shadow[:, 0], pixels_in_gt_shadow[:, 1] + assert (assigned_gt_ids[pixel_idx] != gt_idx).all(), \ + 'Some pixels are dually assigned to ignore and gt!' + shadowed_pixel_labels[:, 1] = gt_labels[gt_idx - 1] + override = ( + assigned_labels[pixel_idx] == shadowed_pixel_labels[:, 1]) + if self.foreground_dominate: + # When a pixel is both positive and shadowed, set it as pos + shadowed_pixel_labels = shadowed_pixel_labels[~override] + else: + # When a pixel is both pos and shadowed, set it as shadowed + assigned_labels[pixel_idx[override]] = -1 + assigned_gt_ids[pixel_idx[override]] = 0 + + assign_result = AssignResult( + num_gts, assigned_gt_ids, None, labels=assigned_labels) + # Add shadowed_labels as assign_result property. Shape: (num_shadow, 2) + assign_result.set_extra_property('shadowed_labels', + shadowed_pixel_labels) + return assign_result + + def assign_one_hot_gt_indices(self, + is_bbox_in_gt_core, + is_bbox_in_gt_shadow, + gt_priority=None): + """Assign only one gt index to each prior box. + + Gts with large gt_priority are more likely to be assigned. + + Args: + is_bbox_in_gt_core (Tensor): Bool tensor indicating the bbox center + is in the core area of a gt (e.g. 0-0.2). + Shape: (num_prior, num_gt). + is_bbox_in_gt_shadow (Tensor): Bool tensor indicating the bbox + center is in the shadowed area of a gt (e.g. 0.2-0.5). + Shape: (num_prior, num_gt). + gt_priority (Tensor): Priorities of gts. The gt with a higher + priority is more likely to be assigned to the bbox when the bbox + match with multiple gts. Shape: (num_gt, ). + + Returns: + tuple: Returns (assigned_gt_inds, shadowed_gt_inds). + + - assigned_gt_inds: The assigned gt index of each prior bbox \ + (i.e. index from 1 to num_gts). Shape: (num_prior, ). + - shadowed_gt_inds: shadowed gt indices. It is a tensor of \ + shape (num_ignore, 2) with first column being the \ + shadowed prior bbox indices and the second column the \ + shadowed gt indices (1-based). + """ + num_bboxes, num_gts = is_bbox_in_gt_core.shape + + if gt_priority is None: + gt_priority = torch.arange( + num_gts, device=is_bbox_in_gt_core.device) + assert gt_priority.size(0) == num_gts + # The bigger gt_priority, the more preferable to be assigned + # The assigned inds are by default 0 (background) + assigned_gt_inds = is_bbox_in_gt_core.new_zeros((num_bboxes, ), + dtype=torch.long) + # Shadowed bboxes are assigned to be background. But the corresponding + # label is ignored during loss calculation, which is done through + # shadowed_gt_inds + shadowed_gt_inds = torch.nonzero(is_bbox_in_gt_shadow, as_tuple=False) + if is_bbox_in_gt_core.sum() == 0: # No gt match + shadowed_gt_inds[:, 1] += 1 # 1-based. For consistency issue + return assigned_gt_inds, shadowed_gt_inds + + # The priority of each prior box and gt pair. If one prior box is + # matched bo multiple gts. Only the pair with the highest priority + # is saved + pair_priority = is_bbox_in_gt_core.new_full((num_bboxes, num_gts), + -1, + dtype=torch.long) + + # Each bbox could match with multiple gts. + # The following codes deal with this situation + # Matched bboxes (to any gt). Shape: (num_pos_anchor, ) + inds_of_match = torch.any(is_bbox_in_gt_core, dim=1) + # The matched gt index of each positive bbox. Length >= num_pos_anchor + # , since one bbox could match multiple gts + matched_bbox_gt_inds = torch.nonzero( + is_bbox_in_gt_core, as_tuple=False)[:, 1] + # Assign priority to each bbox-gt pair. + pair_priority[is_bbox_in_gt_core] = gt_priority[matched_bbox_gt_inds] + _, argmax_priority = pair_priority[inds_of_match].max(dim=1) + assigned_gt_inds[inds_of_match] = argmax_priority + 1 # 1-based + # Zero-out the assigned anchor box to filter the shadowed gt indices + is_bbox_in_gt_core[inds_of_match, argmax_priority] = 0 + # Concat the shadowed indices due to overlapping with that out side of + # effective scale. shape: (total_num_ignore, 2) + shadowed_gt_inds = torch.cat( + (shadowed_gt_inds, torch.nonzero( + is_bbox_in_gt_core, as_tuple=False)), + dim=0) + # `is_bbox_in_gt_core` should be changed back to keep arguments intact. + is_bbox_in_gt_core[inds_of_match, argmax_priority] = 1 + # 1-based shadowed gt indices, to be consistent with `assigned_gt_inds` + if shadowed_gt_inds.numel() > 0: + shadowed_gt_inds[:, 1] += 1 + return assigned_gt_inds, shadowed_gt_inds diff --git a/mmdet/core/bbox/assigners/max_iou_assigner.py b/mmdet/core/bbox/assigners/max_iou_assigner.py index 87f9f725..6e3e54af 100644 --- a/mmdet/core/bbox/assigners/max_iou_assigner.py +++ b/mmdet/core/bbox/assigners/max_iou_assigner.py @@ -1,19 +1,20 @@ import torch -from ..geometry import bbox_overlaps +from ..builder import BBOX_ASSIGNERS +from ..iou_calculators import build_iou_calculator from .assign_result import AssignResult from .base_assigner import BaseAssigner +@BBOX_ASSIGNERS.register_module() class MaxIoUAssigner(BaseAssigner): """Assign a corresponding gt bbox or background to each bbox. - Each proposals will be assigned with `-1`, `0`, or a positive integer + Each proposals will be assigned with `-1`, or a semi-positive integer indicating the ground truth index. - - -1: don't care - - 0: negative sample, no assigned gt - - positive integer: positive sample, index (1-based) of assigned gt + - -1: negative sample, no assigned gt + - semi-positive integer: positive sample, index (0-based) of assigned gt Args: pos_iou_thr (float): IoU threshold for positive bboxes. @@ -28,6 +29,9 @@ class MaxIoUAssigner(BaseAssigner): ignoring any bboxes. ignore_wrt_candidates (bool): Whether to compute the iof between `bboxes` and `gt_bboxes_ignore`, or the contrary. + match_low_quality (bool): Whether to allow low quality matches. This is + usually allowed for RPN and single stage detectors, but not allowed + in the second stage. Details are demonetrated in Step 4. gpu_assign_thr (int): The upper bound of the number of GT for GPU assign. When the number of gt is above this threshold, will assign on CPU device. Negative values mean not assign on CPU. @@ -40,7 +44,9 @@ def __init__(self, gt_max_assign_all=True, ignore_iof_thr=-1, ignore_wrt_candidates=True, - gpu_assign_thr=-1): + match_low_quality=True, + gpu_assign_thr=-1, + iou_calculator=dict(type='BboxOverlaps2D')): self.pos_iou_thr = pos_iou_thr self.neg_iou_thr = neg_iou_thr self.min_pos_iou = min_pos_iou @@ -48,17 +54,18 @@ def __init__(self, self.ignore_iof_thr = ignore_iof_thr self.ignore_wrt_candidates = ignore_wrt_candidates self.gpu_assign_thr = gpu_assign_thr + self.match_low_quality = match_low_quality + self.iou_calculator = build_iou_calculator(iou_calculator) def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None): """Assign gt to bboxes. This method assign a gt bbox to every bbox (proposal/anchor), each bbox - will be assigned with -1, 0, or a positive number. -1 means don't care, - 0 means negative sample, positive number is the index (1-based) of - assigned gt. + will be assigned with -1, or a semi-positive number. -1 means negative + sample, semi-positive number is the index (0-based) of assigned gt. The assignment is done in following steps, the order matters. - 1. assign every bbox to -1 + 1. assign every bbox to the background 2. assign proposals whose iou with all gts < neg_iou_thr to 0 3. for each bbox, if the iou with its nearest gt >= pos_iou_thr, assign it to that bbox @@ -95,17 +102,16 @@ def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None): if gt_labels is not None: gt_labels = gt_labels.cpu() - bboxes = bboxes[:, :4] - overlaps = bbox_overlaps(gt_bboxes, bboxes) + overlaps = self.iou_calculator(gt_bboxes, bboxes) if (self.ignore_iof_thr > 0 and gt_bboxes_ignore is not None and gt_bboxes_ignore.numel() > 0 and bboxes.numel() > 0): if self.ignore_wrt_candidates: - ignore_overlaps = bbox_overlaps( + ignore_overlaps = self.iou_calculator( bboxes, gt_bboxes_ignore, mode='iof') ignore_max_overlaps, _ = ignore_overlaps.max(dim=1) else: - ignore_overlaps = bbox_overlaps( + ignore_overlaps = self.iou_calculator( gt_bboxes_ignore, bboxes, mode='iof') ignore_max_overlaps, _ = ignore_overlaps.max(dim=0) overlaps[:, ignore_max_overlaps > self.ignore_iof_thr] = -1 @@ -145,8 +151,9 @@ def assign_wrt_overlaps(self, overlaps, gt_labels=None): if gt_labels is None: assigned_labels = None else: - assigned_labels = overlaps.new_zeros((num_bboxes, ), - dtype=torch.long) + assigned_labels = overlaps.new_full((num_bboxes, ), + -1, + dtype=torch.long) return AssignResult( num_gts, assigned_gt_inds, @@ -161,6 +168,7 @@ def assign_wrt_overlaps(self, overlaps, gt_labels=None): gt_max_overlaps, gt_argmax_overlaps = overlaps.max(dim=1) # 2. assign negative: below + # the negative inds are set to be 0 if isinstance(self.neg_iou_thr, float): assigned_gt_inds[(max_overlaps >= 0) & (max_overlaps < self.neg_iou_thr)] = 0 @@ -173,18 +181,27 @@ def assign_wrt_overlaps(self, overlaps, gt_labels=None): pos_inds = max_overlaps >= self.pos_iou_thr assigned_gt_inds[pos_inds] = argmax_overlaps[pos_inds] + 1 - # 4. assign fg: for each gt, proposals with highest IoU - for i in range(num_gts): - if gt_max_overlaps[i] >= self.min_pos_iou: - if self.gt_max_assign_all: - max_iou_inds = overlaps[i, :] == gt_max_overlaps[i] - assigned_gt_inds[max_iou_inds] = i + 1 - else: - assigned_gt_inds[gt_argmax_overlaps[i]] = i + 1 + if self.match_low_quality: + # Low-quality matching will overwirte the assigned_gt_inds assigned + # in Step 3. Thus, the assigned gt might not be the best one for + # prediction. + # For example, if bbox A has 0.9 and 0.8 iou with GT bbox 1 & 2, + # bbox 1 will be assigned as the best target for bbox A in step 3. + # However, if GT bbox 2's gt_argmax_overlaps = A, bbox A's + # assigned_gt_inds will be overwritten to be bbox B. + # This might be the reason that it is not used in ROI Heads. + for i in range(num_gts): + if gt_max_overlaps[i] >= self.min_pos_iou: + if self.gt_max_assign_all: + max_iou_inds = overlaps[i, :] == gt_max_overlaps[i] + assigned_gt_inds[max_iou_inds] = i + 1 + else: + assigned_gt_inds[gt_argmax_overlaps[i]] = i + 1 if gt_labels is not None: - assigned_labels = assigned_gt_inds.new_zeros((num_bboxes, )) - pos_inds = torch.nonzero(assigned_gt_inds > 0).squeeze() + assigned_labels = assigned_gt_inds.new_full((num_bboxes, ), -1) + pos_inds = torch.nonzero( + assigned_gt_inds > 0, as_tuple=False).squeeze() if pos_inds.numel() > 0: assigned_labels[pos_inds] = gt_labels[ assigned_gt_inds[pos_inds] - 1] diff --git a/mmdet/core/bbox/assigners/point_assigner.py b/mmdet/core/bbox/assigners/point_assigner.py index 263b3096..fb8f5e4e 100644 --- a/mmdet/core/bbox/assigners/point_assigner.py +++ b/mmdet/core/bbox/assigners/point_assigner.py @@ -1,9 +1,11 @@ import torch +from ..builder import BBOX_ASSIGNERS from .assign_result import AssignResult from .base_assigner import BaseAssigner +@BBOX_ASSIGNERS.register_module() class PointAssigner(BaseAssigner): """Assign a corresponding gt bbox or background to each point. @@ -12,7 +14,6 @@ class PointAssigner(BaseAssigner): - 0: negative sample, no assigned gt - positive integer: positive sample, index (1-based) of assigned gt - """ def __init__(self, scale=4, pos_num=3): @@ -23,12 +24,12 @@ def assign(self, points, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None): """Assign gt to points. This method assign a gt bbox to every points set, each points set - will be assigned with 0, or a positive number. - 0 means negative sample, positive number is the index (1-based) of + will be assigned with the background_label (-1), or a label number. + -1 is background, and semi-positive number is the index (0-based) of assigned gt. The assignment is done in following steps, the order matters. - 1. assign every points to 0 + 1. assign every points to the background_label (-1) 2. A point is assigned to some gt bbox if (i) the point is within the k closest points to the gt bbox (ii) the distance between this point and the gt is smaller than @@ -57,8 +58,9 @@ def assign(self, points, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None): if gt_labels is None: assigned_labels = None else: - assigned_labels = points.new_zeros((num_points, ), - dtype=torch.long) + assigned_labels = points.new_full((num_points, ), + -1, + dtype=torch.long) return AssignResult( num_gts, assigned_gt_inds, None, labels=assigned_labels) @@ -118,8 +120,9 @@ def assign(self, points, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None): less_than_recorded_index] if gt_labels is not None: - assigned_labels = assigned_gt_inds.new_zeros((num_points, )) - pos_inds = torch.nonzero(assigned_gt_inds > 0).squeeze() + assigned_labels = assigned_gt_inds.new_full((num_points, ), -1) + pos_inds = torch.nonzero( + assigned_gt_inds > 0, as_tuple=False).squeeze() if pos_inds.numel() > 0: assigned_labels[pos_inds] = gt_labels[ assigned_gt_inds[pos_inds] - 1] diff --git a/mmdet/core/bbox/builder.py b/mmdet/core/bbox/builder.py new file mode 100644 index 00000000..682683b6 --- /dev/null +++ b/mmdet/core/bbox/builder.py @@ -0,0 +1,20 @@ +from mmcv.utils import Registry, build_from_cfg + +BBOX_ASSIGNERS = Registry('bbox_assigner') +BBOX_SAMPLERS = Registry('bbox_sampler') +BBOX_CODERS = Registry('bbox_coder') + + +def build_assigner(cfg, **default_args): + """Builder of box assigner.""" + return build_from_cfg(cfg, BBOX_ASSIGNERS, default_args) + + +def build_sampler(cfg, **default_args): + """Builder of box sampler.""" + return build_from_cfg(cfg, BBOX_SAMPLERS, default_args) + + +def build_bbox_coder(cfg, **default_args): + """Builder of box coder.""" + return build_from_cfg(cfg, BBOX_CODERS, default_args) diff --git a/mmdet/core/bbox/coder/__init__.py b/mmdet/core/bbox/coder/__init__.py new file mode 100644 index 00000000..b8ebc369 --- /dev/null +++ b/mmdet/core/bbox/coder/__init__.py @@ -0,0 +1,10 @@ +from .base_bbox_coder import BaseBBoxCoder +from .delta_xywh_bbox_coder import DeltaXYWHBBoxCoder +from .legacy_delta_xywh_bbox_coder import LegacyDeltaXYWHBBoxCoder +from .pseudo_bbox_coder import PseudoBBoxCoder +from .tblr_bbox_coder import TBLRBBoxCoder + +__all__ = [ + 'BaseBBoxCoder', 'PseudoBBoxCoder', 'DeltaXYWHBBoxCoder', + 'LegacyDeltaXYWHBBoxCoder', 'TBLRBBoxCoder' +] diff --git a/mmdet/core/bbox/coder/base_bbox_coder.py b/mmdet/core/bbox/coder/base_bbox_coder.py new file mode 100644 index 00000000..6e427272 --- /dev/null +++ b/mmdet/core/bbox/coder/base_bbox_coder.py @@ -0,0 +1,19 @@ +from abc import ABCMeta, abstractmethod + + +class BaseBBoxCoder(metaclass=ABCMeta): + """Base bounding box coder.""" + + def __init__(self, **kwargs): + pass + + @abstractmethod + def encode(self, bboxes, gt_bboxes): + """Encode deltas between bboxes and ground truth boxes.""" + pass + + @abstractmethod + def decode(self, bboxes, bboxes_pred): + """Decode the predicted bboxes according to prediction and base + boxes.""" + pass diff --git a/mmdet/core/bbox/coder/delta_xywh_bbox_coder.py b/mmdet/core/bbox/coder/delta_xywh_bbox_coder.py new file mode 100644 index 00000000..9c206c10 --- /dev/null +++ b/mmdet/core/bbox/coder/delta_xywh_bbox_coder.py @@ -0,0 +1,197 @@ +import numpy as np +import torch + +from ..builder import BBOX_CODERS +from .base_bbox_coder import BaseBBoxCoder + + +@BBOX_CODERS.register_module() +class DeltaXYWHBBoxCoder(BaseBBoxCoder): + """Delta XYWH BBox coder. + + Following the practice in `R-CNN `_, + this coder encodes bbox (x1, y1, x2, y2) into delta (dx, dy, dw, dh) and + decodes delta (dx, dy, dw, dh) back to original bbox (x1, y1, x2, y2). + + Args: + target_means (Sequence[float]): Denormalizing means of target for + delta coordinates + target_stds (Sequence[float]): Denormalizing standard deviation of + target for delta coordinates + """ + + def __init__(self, + target_means=(0., 0., 0., 0.), + target_stds=(1., 1., 1., 1.)): + super(BaseBBoxCoder, self).__init__() + self.means = target_means + self.stds = target_stds + + def encode(self, bboxes, gt_bboxes): + """Get box regression transformation deltas that can be used to + transform the ``bboxes`` into the ``gt_bboxes``. + + Args: + bboxes (torch.Tensor): Source boxes, e.g., object proposals. + gt_bboxes (torch.Tensor): Target of the transformation, e.g., + ground-truth boxes. + + Returns: + torch.Tensor: Box transformation deltas + """ + + assert bboxes.size(0) == gt_bboxes.size(0) + assert bboxes.size(-1) == gt_bboxes.size(-1) == 4 + encoded_bboxes = bbox2delta(bboxes, gt_bboxes, self.means, self.stds) + return encoded_bboxes + + def decode(self, + bboxes, + pred_bboxes, + max_shape=None, + wh_ratio_clip=16 / 1000): + """Apply transformation `pred_bboxes` to `boxes`. + + Args: + boxes (torch.Tensor): Basic boxes. + pred_bboxes (torch.Tensor): Encoded boxes with shape + max_shape (tuple[int], optional): Maximum shape of boxes. + Defaults to None. + wh_ratio_clip (float, optional): The allowed ratio between + width and height. + + Returns: + torch.Tensor: Decoded boxes. + """ + + assert pred_bboxes.size(0) == bboxes.size(0) + decoded_bboxes = delta2bbox(bboxes, pred_bboxes, self.means, self.stds, + max_shape, wh_ratio_clip) + + return decoded_bboxes + + +def bbox2delta(proposals, gt, means=(0., 0., 0., 0.), stds=(1., 1., 1., 1.)): + """Compute deltas of proposals w.r.t. gt. + + We usually compute the deltas of x, y, w, h of proposals w.r.t ground + truth bboxes to get regression target. + This is the inverse function of :func:`delta2bbox`. + + Args: + proposals (Tensor): Boxes to be transformed, shape (N, ..., 4) + gt (Tensor): Gt bboxes to be used as base, shape (N, ..., 4) + means (Sequence[float]): Denormalizing means for delta coordinates + stds (Sequence[float]): Denormalizing standard deviation for delta + coordinates + + Returns: + Tensor: deltas with shape (N, 4), where columns represent dx, dy, + dw, dh. + """ + assert proposals.size() == gt.size() + + proposals = proposals.float() + gt = gt.float() + px = (proposals[..., 0] + proposals[..., 2]) * 0.5 + py = (proposals[..., 1] + proposals[..., 3]) * 0.5 + pw = proposals[..., 2] - proposals[..., 0] + ph = proposals[..., 3] - proposals[..., 1] + + gx = (gt[..., 0] + gt[..., 2]) * 0.5 + gy = (gt[..., 1] + gt[..., 3]) * 0.5 + gw = gt[..., 2] - gt[..., 0] + gh = gt[..., 3] - gt[..., 1] + + dx = (gx - px) / pw + dy = (gy - py) / ph + dw = torch.log(gw / pw) + dh = torch.log(gh / ph) + deltas = torch.stack([dx, dy, dw, dh], dim=-1) + + means = deltas.new_tensor(means).unsqueeze(0) + stds = deltas.new_tensor(stds).unsqueeze(0) + deltas = deltas.sub_(means).div_(stds) + + return deltas + + +def delta2bbox(rois, + deltas, + means=(0., 0., 0., 0.), + stds=(1., 1., 1., 1.), + max_shape=None, + wh_ratio_clip=16 / 1000): + """Apply deltas to shift/scale base boxes. + + Typically the rois are anchor or proposed bounding boxes and the deltas are + network outputs used to shift/scale those boxes. + This is the inverse function of :func:`bbox2delta`. + + Args: + rois (Tensor): Boxes to be transformed. Has shape (N, 4) + deltas (Tensor): Encoded offsets with respect to each roi. + Has shape (N, 4 * num_classes). Note N = num_anchors * W * H when + rois is a grid of anchors. Offset encoding follows [1]_. + means (Sequence[float]): Denormalizing means for delta coordinates + stds (Sequence[float]): Denormalizing standard deviation for delta + coordinates + max_shape (tuple[int, int]): Maximum bounds for boxes. specifies (H, W) + wh_ratio_clip (float): Maximum aspect ratio for boxes. + + Returns: + Tensor: Boxes with shape (N, 4), where columns represent + tl_x, tl_y, br_x, br_y. + + References: + .. [1] https://arxiv.org/abs/1311.2524 + + Example: + >>> rois = torch.Tensor([[ 0., 0., 1., 1.], + >>> [ 0., 0., 1., 1.], + >>> [ 0., 0., 1., 1.], + >>> [ 5., 5., 5., 5.]]) + >>> deltas = torch.Tensor([[ 0., 0., 0., 0.], + >>> [ 1., 1., 1., 1.], + >>> [ 0., 0., 2., -1.], + >>> [ 0.7, -1.9, -0.5, 0.3]]) + >>> delta2bbox(rois, deltas, max_shape=(32, 32)) + tensor([[0.0000, 0.0000, 1.0000, 1.0000], + [0.1409, 0.1409, 2.8591, 2.8591], + [0.0000, 0.3161, 4.1945, 0.6839], + [5.0000, 5.0000, 5.0000, 5.0000]]) + """ + means = deltas.new_tensor(means).repeat(1, deltas.size(1) // 4) + stds = deltas.new_tensor(stds).repeat(1, deltas.size(1) // 4) + denorm_deltas = deltas * stds + means + dx = denorm_deltas[:, 0::4] + dy = denorm_deltas[:, 1::4] + dw = denorm_deltas[:, 2::4] + dh = denorm_deltas[:, 3::4] + max_ratio = np.abs(np.log(wh_ratio_clip)) + dw = dw.clamp(min=-max_ratio, max=max_ratio) + dh = dh.clamp(min=-max_ratio, max=max_ratio) + # Compute center of each roi + px = ((rois[:, 0] + rois[:, 2]) * 0.5).unsqueeze(1).expand_as(dx) + py = ((rois[:, 1] + rois[:, 3]) * 0.5).unsqueeze(1).expand_as(dy) + # Compute width/height of each roi + pw = (rois[:, 2] - rois[:, 0]).unsqueeze(1).expand_as(dw) + ph = (rois[:, 3] - rois[:, 1]).unsqueeze(1).expand_as(dh) + # Use exp(network energy) to enlarge/shrink each roi + gw = pw * dw.exp() + gh = ph * dh.exp() + # Use network energy to shift the center of each roi + gx = px + pw * dx + gy = py + ph * dy + # Convert center-xy/width/height to top-left, bottom-right + x1 = gx - gw * 0.5 + y1 = gy - gh * 0.5 + x2 = gx + gw * 0.5 + y2 = gy + gh * 0.5 + if max_shape is not None: + x1 = x1.clamp(min=0, max=max_shape[1]) + y1 = y1.clamp(min=0, max=max_shape[0]) + x2 = x2.clamp(min=0, max=max_shape[1]) + y2 = y2.clamp(min=0, max=max_shape[0]) + bboxes = torch.stack([x1, y1, x2, y2], dim=-1).view_as(deltas) + return bboxes diff --git a/mmdet/core/bbox/coder/legacy_delta_xywh_bbox_coder.py b/mmdet/core/bbox/coder/legacy_delta_xywh_bbox_coder.py new file mode 100644 index 00000000..74e801a1 --- /dev/null +++ b/mmdet/core/bbox/coder/legacy_delta_xywh_bbox_coder.py @@ -0,0 +1,212 @@ +import numpy as np +import torch + +from ..builder import BBOX_CODERS +from .base_bbox_coder import BaseBBoxCoder + + +@BBOX_CODERS.register_module() +class LegacyDeltaXYWHBBoxCoder(BaseBBoxCoder): + """Legacy Delta XYWH BBox coder used in MMDet V1.x. + + Following the practice in R-CNN [1]_, this coder encodes bbox (x1, y1, x2, + y2) into delta (dx, dy, dw, dh) and decodes delta (dx, dy, dw, dh) + back to original bbox (x1, y1, x2, y2). + + Note: + The main difference between :class`LegacyDeltaXYWHBBoxCoder` and + :class:`DeltaXYWHBBoxCoder` is whether ``+ 1`` is used during width and + height calculation. We suggest to only use this coder when testing with + MMDet V1.x models. + + References: + .. [1] https://arxiv.org/abs/1311.2524 + + Args: + target_means (Sequence[float]): denormalizing means of target for + delta coordinates + target_stds (Sequence[float]): denormalizing standard deviation of + target for delta coordinates + """ + + def __init__(self, + target_means=(0., 0., 0., 0.), + target_stds=(1., 1., 1., 1.)): + super(BaseBBoxCoder, self).__init__() + self.means = target_means + self.stds = target_stds + + def encode(self, bboxes, gt_bboxes): + """Get box regression transformation deltas that can be used to + transform the ``bboxes`` into the ``gt_bboxes``. + + Args: + bboxes (torch.Tensor): source boxes, e.g., object proposals. + gt_bboxes (torch.Tensor): target of the transformation, e.g., + ground-truth boxes. + + Returns: + torch.Tensor: Box transformation deltas + """ + assert bboxes.size(0) == gt_bboxes.size(0) + assert bboxes.size(-1) == gt_bboxes.size(-1) == 4 + encoded_bboxes = legacy_bbox2delta(bboxes, gt_bboxes, self.means, + self.stds) + return encoded_bboxes + + def decode(self, + bboxes, + pred_bboxes, + max_shape=None, + wh_ratio_clip=16 / 1000): + """Apply transformation `pred_bboxes` to `boxes`. + + Args: + boxes (torch.Tensor): Basic boxes. + pred_bboxes (torch.Tensor): Encoded boxes with shape + max_shape (tuple[int], optional): Maximum shape of boxes. + Defaults to None. + wh_ratio_clip (float, optional): The allowed ratio between + width and height. + + Returns: + torch.Tensor: Decoded boxes. + """ + assert pred_bboxes.size(0) == bboxes.size(0) + decoded_bboxes = legacy_delta2bbox(bboxes, pred_bboxes, self.means, + self.stds, max_shape, wh_ratio_clip) + + return decoded_bboxes + + +def legacy_bbox2delta(proposals, + gt, + means=(0., 0., 0., 0.), + stds=(1., 1., 1., 1.)): + """Compute deltas of proposals w.r.t. gt in the MMDet V1.x manner. + + We usually compute the deltas of x, y, w, h of proposals w.r.t ground + truth bboxes to get regression target. + This is the inverse function of `delta2bbox()` + + Args: + proposals (Tensor): Boxes to be transformed, shape (N, ..., 4) + gt (Tensor): Gt bboxes to be used as base, shape (N, ..., 4) + means (Sequence[float]): Denormalizing means for delta coordinates + stds (Sequence[float]): Denormalizing standard deviation for delta + coordinates + + Returns: + Tensor: deltas with shape (N, 4), where columns represent dx, dy, + dw, dh. + """ + assert proposals.size() == gt.size() + + proposals = proposals.float() + gt = gt.float() + px = (proposals[..., 0] + proposals[..., 2]) * 0.5 + py = (proposals[..., 1] + proposals[..., 3]) * 0.5 + pw = proposals[..., 2] - proposals[..., 0] + 1.0 + ph = proposals[..., 3] - proposals[..., 1] + 1.0 + + gx = (gt[..., 0] + gt[..., 2]) * 0.5 + gy = (gt[..., 1] + gt[..., 3]) * 0.5 + gw = gt[..., 2] - gt[..., 0] + 1.0 + gh = gt[..., 3] - gt[..., 1] + 1.0 + + dx = (gx - px) / pw + dy = (gy - py) / ph + dw = torch.log(gw / pw) + dh = torch.log(gh / ph) + deltas = torch.stack([dx, dy, dw, dh], dim=-1) + + means = deltas.new_tensor(means).unsqueeze(0) + stds = deltas.new_tensor(stds).unsqueeze(0) + deltas = deltas.sub_(means).div_(stds) + + return deltas + + +def legacy_delta2bbox(rois, + deltas, + means=(0., 0., 0., 0.), + stds=(1., 1., 1., 1.), + max_shape=None, + wh_ratio_clip=16 / 1000): + """Apply deltas to shift/scale base boxes in the MMDet V1.x manner. + + Typically the rois are anchor or proposed bounding boxes and the deltas are + network outputs used to shift/scale those boxes. + This is the inverse function of `bbox2delta()` + + Args: + rois (Tensor): Boxes to be transformed. Has shape (N, 4) + deltas (Tensor): Encoded offsets with respect to each roi. + Has shape (N, 4 * num_classes). Note N = num_anchors * W * H when + rois is a grid of anchors. Offset encoding follows [1]_. + means (Sequence[float]): Denormalizing means for delta coordinates + stds (Sequence[float]): Denormalizing standard deviation for delta + coordinates + max_shape (tuple[int, int]): Maximum bounds for boxes. specifies (H, W) + wh_ratio_clip (float): Maximum aspect ratio for boxes. + + Returns: + Tensor: Boxes with shape (N, 4), where columns represent + tl_x, tl_y, br_x, br_y. + + References: + .. [1] https://arxiv.org/abs/1311.2524 + + Example: + >>> rois = torch.Tensor([[ 0., 0., 1., 1.], + >>> [ 0., 0., 1., 1.], + >>> [ 0., 0., 1., 1.], + >>> [ 5., 5., 5., 5.]]) + >>> deltas = torch.Tensor([[ 0., 0., 0., 0.], + >>> [ 1., 1., 1., 1.], + >>> [ 0., 0., 2., -1.], + >>> [ 0.7, -1.9, -0.5, 0.3]]) + >>> legacy_delta2bbox(rois, deltas, max_shape=(32, 32)) + tensor([[0.0000, 0.0000, 1.5000, 1.5000], + [0.0000, 0.0000, 5.2183, 5.2183], + [0.0000, 0.1321, 7.8891, 0.8679], + [5.3967, 2.4251, 6.0033, 3.7749]]) + """ + means = deltas.new_tensor(means).repeat(1, deltas.size(1) // 4) + stds = deltas.new_tensor(stds).repeat(1, deltas.size(1) // 4) + denorm_deltas = deltas * stds + means + dx = denorm_deltas[:, 0::4] + dy = denorm_deltas[:, 1::4] + dw = denorm_deltas[:, 2::4] + dh = denorm_deltas[:, 3::4] + max_ratio = np.abs(np.log(wh_ratio_clip)) + dw = dw.clamp(min=-max_ratio, max=max_ratio) + dh = dh.clamp(min=-max_ratio, max=max_ratio) + # Compute center of each roi + px = ((rois[:, 0] + rois[:, 2]) * 0.5).unsqueeze(1).expand_as(dx) + py = ((rois[:, 1] + rois[:, 3]) * 0.5).unsqueeze(1).expand_as(dy) + # Compute width/height of each roi + pw = (rois[:, 2] - rois[:, 0] + 1.0).unsqueeze(1).expand_as(dw) + ph = (rois[:, 3] - rois[:, 1] + 1.0).unsqueeze(1).expand_as(dh) + # Use exp(network energy) to enlarge/shrink each roi + gw = pw * dw.exp() + gh = ph * dh.exp() + # Use network energy to shift the center of each roi + gx = px + pw * dx + gy = py + ph * dy + # Convert center-xy/width/height to top-left, bottom-right + + # The true legacy box coder should +- 0.5 here. + # However, current implementation improves the performance when testing + # the models trained in MMDetection 1.X (~0.5 bbox AP, 0.2 mask AP) + x1 = gx - gw * 0.5 + y1 = gy - gh * 0.5 + x2 = gx + gw * 0.5 + y2 = gy + gh * 0.5 + if max_shape is not None: + x1 = x1.clamp(min=0, max=max_shape[1] - 1) + y1 = y1.clamp(min=0, max=max_shape[0] - 1) + x2 = x2.clamp(min=0, max=max_shape[1] - 1) + y2 = y2.clamp(min=0, max=max_shape[0] - 1) + bboxes = torch.stack([x1, y1, x2, y2], dim=-1).view_as(deltas) + return bboxes diff --git a/mmdet/core/bbox/coder/pseudo_bbox_coder.py b/mmdet/core/bbox/coder/pseudo_bbox_coder.py new file mode 100644 index 00000000..1c8346f4 --- /dev/null +++ b/mmdet/core/bbox/coder/pseudo_bbox_coder.py @@ -0,0 +1,18 @@ +from ..builder import BBOX_CODERS +from .base_bbox_coder import BaseBBoxCoder + + +@BBOX_CODERS.register_module() +class PseudoBBoxCoder(BaseBBoxCoder): + """Pseudo bounding box coder.""" + + def __init__(self, **kwargs): + super(BaseBBoxCoder, self).__init__(**kwargs) + + def encode(self, bboxes, gt_bboxes): + """torch.Tensor: return the given ``bboxes``""" + return gt_bboxes + + def decode(self, bboxes, pred_bboxes): + """torch.Tensor: return the given ``pred_bboxes``""" + return pred_bboxes diff --git a/mmdet/core/bbox/coder/tblr_bbox_coder.py b/mmdet/core/bbox/coder/tblr_bbox_coder.py new file mode 100644 index 00000000..f586a419 --- /dev/null +++ b/mmdet/core/bbox/coder/tblr_bbox_coder.py @@ -0,0 +1,165 @@ +import torch + +from ..builder import BBOX_CODERS +from .base_bbox_coder import BaseBBoxCoder + + +@BBOX_CODERS.register_module() +class TBLRBBoxCoder(BaseBBoxCoder): + """TBLR BBox coder. + + Following the practice in `FSAF `_, + this coder encodes gt bboxes (x1, y1, x2, y2) into (top, bottom, left, + right) and decode it back to the original. + + Args: + normalizer (list | float): Normalization factor to be + divided with when coding the coordinates. If it is a list, it should + have length of 4 indicating normalization factor in tblr dims. + Otherwise it is a unified float factor for all dims. Default: 4.0 + """ + + def __init__(self, normalizer=4.0): + super(BaseBBoxCoder, self).__init__() + self.normalizer = normalizer + + def encode(self, bboxes, gt_bboxes): + """Get box regression transformation deltas that can be used to + transform the ``bboxes`` into the ``gt_bboxes`` in the (top, left, + bottom, right) order. + + Args: + bboxes (torch.Tensor): source boxes, e.g., object proposals. + gt_bboxes (torch.Tensor): target of the transformation, e.g., + ground truth boxes. + + Returns: + torch.Tensor: Box transformation deltas + """ + assert bboxes.size(0) == gt_bboxes.size(0) + assert bboxes.size(-1) == gt_bboxes.size(-1) == 4 + encoded_bboxes = bboxes2tblr( + bboxes, gt_bboxes, normalizer=self.normalizer) + return encoded_bboxes + + def decode(self, bboxes, pred_bboxes, max_shape=None): + """Apply transformation `pred_bboxes` to `boxes`. + + Args: + boxes (torch.Tensor): Basic boxes. + pred_bboxes (torch.Tensor): Encoded boxes with shape + max_shape (tuple[int], optional): Maximum shape of boxes. + Defaults to None. + + Returns: + torch.Tensor: Decoded boxes. + """ + assert pred_bboxes.size(0) == bboxes.size(0) + decoded_bboxes = tblr2bboxes( + bboxes, + pred_bboxes, + normalizer=self.normalizer, + max_shape=max_shape) + + return decoded_bboxes + + +def bboxes2tblr(priors, gts, normalizer=4.0, normalize_by_wh=True): + """Encode ground truth boxes to tblr coordinate. + + It first convert the gt coordinate to tblr format, + (top, bottom, left, right), relative to prior box centers. + The tblr coordinate may be normalized by the side length of prior bboxes + if `normalize_by_wh` is specified as True, and it is then normalized by + the `normalizer` factor. + + Args: + priors (Tensor): Prior boxes in point form + Shape: (num_proposals,4). + gts (Tensor): Coords of ground truth for each prior in point-form + Shape: (num_proposals, 4). + normalizer (Sequence[float] | float): normalization parameter of + encoded boxes. If it is a list, it has to have length = 4. + Default: 4.0 + normalize_by_wh (bool): Whether to normalize tblr coordinate by the + side length (wh) of prior bboxes. + + Return: + encoded boxes (Tensor), Shape: (num_proposals, 4) + """ + + # dist b/t match center and prior's center + if not isinstance(normalizer, float): + normalizer = torch.tensor(normalizer, device=priors.device) + assert len(normalizer) == 4, 'Normalizer must have length = 4' + assert priors.size(0) == gts.size(0) + prior_centers = (priors[:, 0:2] + priors[:, 2:4]) / 2 + xmin, ymin, xmax, ymax = gts.split(1, dim=1) + top = prior_centers[:, 1].unsqueeze(1) - ymin + bottom = ymax - prior_centers[:, 1].unsqueeze(1) + left = prior_centers[:, 0].unsqueeze(1) - xmin + right = xmax - prior_centers[:, 0].unsqueeze(1) + loc = torch.cat((top, bottom, left, right), dim=1) + if normalize_by_wh: + # Normalize tblr by anchor width and height + wh = priors[:, 2:4] - priors[:, 0:2] + w, h = torch.split(wh, 1, dim=1) + loc[:, :2] /= h # tb is normalized by h + loc[:, 2:] /= w # lr is normalized by w + # Normalize tblr by the given normalization factor + return loc / normalizer + + +def tblr2bboxes(priors, + tblr, + normalizer=4.0, + normalize_by_wh=True, + max_shape=None): + """Decode tblr outputs to prediction boxes. + + The process includes 3 steps: 1) De-normalize tblr coordinates by + multiplying it with `normalizer`; 2) De-normalize tblr coordinates by the + prior bbox width and height if `normalize_by_wh` is `True`; 3) Convert + tblr (top, bottom, left, right) pair relative to the center of priors back + to (xmin, ymin, xmax, ymax) coordinate. + + Args: + priors (Tensor): Prior boxes in point form (x0, y0, x1, y1) + Shape: (n,4). + tblr (Tensor): Coords of network output in tblr form + Shape: (n, 4). + normalizer (Sequence[float] | float): Normalization parameter of + encoded boxes. By list, it represents the normalization factors at + tblr dims. By float, it is the unified normalization factor at all + dims. Default: 4.0 + normalize_by_wh (bool): Whether the tblr coordinates have been + normalized by the side length (wh) of prior bboxes. + max_shape (tuple, optional): Shape of the image. Decoded bboxes + exceeding which will be clamped. + + Return: + encoded boxes (Tensor), Shape: (n, 4) + """ + if not isinstance(normalizer, float): + normalizer = torch.tensor(normalizer, device=priors.device) + assert len(normalizer) == 4, 'Normalizer must have length = 4' + assert priors.size(0) == tblr.size(0) + loc_decode = tblr * normalizer + prior_centers = (priors[:, 0:2] + priors[:, 2:4]) / 2 + if normalize_by_wh: + wh = priors[:, 2:4] - priors[:, 0:2] + w, h = torch.split(wh, 1, dim=1) + loc_decode[:, :2] *= h # tb + loc_decode[:, 2:] *= w # lr + top, bottom, left, right = loc_decode.split(1, dim=1) + xmin = prior_centers[:, 0].unsqueeze(1) - left + xmax = prior_centers[:, 0].unsqueeze(1) + right + ymin = prior_centers[:, 1].unsqueeze(1) - top + ymax = prior_centers[:, 1].unsqueeze(1) + bottom + boxes = torch.cat((xmin, ymin, xmax, ymax), dim=1) + if max_shape is not None: + boxes[:, 0].clamp_(min=0, max=max_shape[1]) + boxes[:, 1].clamp_(min=0, max=max_shape[0]) + boxes[:, 2].clamp_(min=0, max=max_shape[1]) + boxes[:, 3].clamp_(min=0, max=max_shape[0]) + return boxes diff --git a/mmdet/core/bbox/demodata.py b/mmdet/core/bbox/demodata.py index d59d6542..94308585 100644 --- a/mmdet/core/bbox/demodata.py +++ b/mmdet/core/bbox/demodata.py @@ -3,8 +3,7 @@ def ensure_rng(rng=None): - """ - Simple version of the ``kwarray.ensure_rng`` + """Simple version of the ``kwarray.ensure_rng`` Args: rng (int | numpy.random.RandomState | None): @@ -28,8 +27,7 @@ def ensure_rng(rng=None): def random_boxes(num=1, scale=1, rng=None): - """ - Simple version of ``kwimage.Boxes.random`` + """Simple version of ``kwimage.Boxes.random`` Returns: Tensor: shape (n, 4) in x1, y1, x2, y2 format. diff --git a/mmdet/core/bbox/iou_calculators/__init__.py b/mmdet/core/bbox/iou_calculators/__init__.py new file mode 100644 index 00000000..e71369a5 --- /dev/null +++ b/mmdet/core/bbox/iou_calculators/__init__.py @@ -0,0 +1,4 @@ +from .builder import build_iou_calculator +from .iou2d_calculator import BboxOverlaps2D, bbox_overlaps + +__all__ = ['build_iou_calculator', 'BboxOverlaps2D', 'bbox_overlaps'] diff --git a/mmdet/core/bbox/iou_calculators/builder.py b/mmdet/core/bbox/iou_calculators/builder.py new file mode 100644 index 00000000..09094d7e --- /dev/null +++ b/mmdet/core/bbox/iou_calculators/builder.py @@ -0,0 +1,8 @@ +from mmcv.utils import Registry, build_from_cfg + +IOU_CALCULATORS = Registry('IoU calculator') + + +def build_iou_calculator(cfg, default_args=None): + """Builder of IoU calculator.""" + return build_from_cfg(cfg, IOU_CALCULATORS, default_args) diff --git a/mmdet/core/bbox/iou_calculators/iou2d_calculator.py b/mmdet/core/bbox/iou_calculators/iou2d_calculator.py new file mode 100644 index 00000000..c4a05cc4 --- /dev/null +++ b/mmdet/core/bbox/iou_calculators/iou2d_calculator.py @@ -0,0 +1,130 @@ +import torch + +from .builder import IOU_CALCULATORS + + +@IOU_CALCULATORS.register_module() +class BboxOverlaps2D(object): + """2D IoU Calculator.""" + + def __call__(self, bboxes1, bboxes2, mode='iou', is_aligned=False): + """Calculate IoU between 2D bboxes. + + Args: + bboxes1 (Tensor): bboxes have shape (m, 4) in + format, or shape (m, 5) in format. + bboxes2 (Tensor): bboxes have shape (m, 4) in + format, shape (m, 5) in format, or be + empty. If is_aligned is ``True``, then m and n must be equal. + mode (str): "iou" (intersection over union) or iof (intersection + over foreground). + + Returns: + ious(Tensor): shape (m, n) if is_aligned == False else shape (m, 1) + """ + assert bboxes1.size(-1) in [0, 4, 5] + assert bboxes2.size(-1) in [0, 4, 5] + if bboxes2.size(-1) == 5: + bboxes2 = bboxes2[..., :4] + if bboxes1.size(-1) == 5: + bboxes1 = bboxes1[..., :4] + return bbox_overlaps(bboxes1, bboxes2, mode, is_aligned) + + def __repr__(self): + """str: a string describing the module""" + repr_str = self.__class__.__name__ + '()' + return repr_str + + +def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False, eps=1e-6): + """Calculate overlap between two set of bboxes. + + If ``is_aligned`` is ``False``, then calculate the ious between each bbox + of bboxes1 and bboxes2, otherwise the ious between each aligned pair of + bboxes1 and bboxes2. + + Args: + bboxes1 (Tensor): shape (m, 4) in format or empty. + bboxes2 (Tensor): shape (n, 4) in format or empty. + If is_aligned is ``True``, then m and n must be equal. + mode (str): "iou" (intersection over union) or iof (intersection over + foreground). + + Returns: + ious(Tensor): shape (m, n) if is_aligned == False else shape (m, 1) + + Example: + >>> bboxes1 = torch.FloatTensor([ + >>> [0, 0, 10, 10], + >>> [10, 10, 20, 20], + >>> [32, 32, 38, 42], + >>> ]) + >>> bboxes2 = torch.FloatTensor([ + >>> [0, 0, 10, 20], + >>> [0, 10, 10, 19], + >>> [10, 10, 20, 20], + >>> ]) + >>> bbox_overlaps(bboxes1, bboxes2) + tensor([[0.5000, 0.0000, 0.0000], + [0.0000, 0.0000, 1.0000], + [0.0000, 0.0000, 0.0000]]) + + Example: + >>> empty = torch.FloatTensor([]) + >>> nonempty = torch.FloatTensor([ + >>> [0, 0, 10, 9], + >>> ]) + >>> assert tuple(bbox_overlaps(empty, nonempty).shape) == (0, 1) + >>> assert tuple(bbox_overlaps(nonempty, empty).shape) == (1, 0) + >>> assert tuple(bbox_overlaps(empty, empty).shape) == (0, 0) + """ + + assert mode in ['iou', 'iof'] + # Either the boxes are empty or the length of boxes's last dimenstion is 4 + assert (bboxes1.size(-1) == 4 or bboxes1.size(0) == 0) + assert (bboxes2.size(-1) == 4 or bboxes2.size(0) == 0) + + rows = bboxes1.size(0) + cols = bboxes2.size(0) + if is_aligned: + assert rows == cols + + if rows * cols == 0: + return bboxes1.new(rows, 1) if is_aligned else bboxes1.new(rows, cols) + + if is_aligned: + lt = torch.max(bboxes1[:, :2], bboxes2[:, :2]) # [rows, 2] + rb = torch.min(bboxes1[:, 2:], bboxes2[:, 2:]) # [rows, 2] + + wh = (rb - lt).clamp(min=0) # [rows, 2] + overlap = wh[:, 0] * wh[:, 1] + area1 = (bboxes1[:, 2] - bboxes1[:, 0]) * ( + bboxes1[:, 3] - bboxes1[:, 1]) + + if mode == 'iou': + area2 = (bboxes2[:, 2] - bboxes2[:, 0]) * ( + bboxes2[:, 3] - bboxes2[:, 1]) + union = area1 + area2 - overlap + else: + union = area1 + else: + lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2]) # [rows, cols, 2] + rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:]) # [rows, cols, 2] + + wh = (rb - lt).clamp(min=0) # [rows, cols, 2] + overlap = wh[:, :, 0] * wh[:, :, 1] + area1 = (bboxes1[:, 2] - bboxes1[:, 0]) * ( + bboxes1[:, 3] - bboxes1[:, 1]) + + if mode == 'iou': + area2 = (bboxes2[:, 2] - bboxes2[:, 0]) * ( + bboxes2[:, 3] - bboxes2[:, 1]) + union = area1[:, None] + area2 - overlap + else: + union = area1[:, None] + + eps = union.new_tensor([eps]) + union = torch.max(union, eps) + ious = overlap / union + + return ious diff --git a/mmdet/core/bbox/samplers/__init__.py b/mmdet/core/bbox/samplers/__init__.py index d709d8ec..0b06303f 100644 --- a/mmdet/core/bbox/samplers/__init__.py +++ b/mmdet/core/bbox/samplers/__init__.py @@ -6,9 +6,10 @@ from .pseudo_sampler import PseudoSampler from .random_sampler import RandomSampler from .sampling_result import SamplingResult +from .score_hlr_sampler import ScoreHLRSampler __all__ = [ 'BaseSampler', 'PseudoSampler', 'RandomSampler', 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', - 'OHEMSampler', 'SamplingResult' + 'OHEMSampler', 'SamplingResult', 'ScoreHLRSampler' ] diff --git a/mmdet/core/bbox/samplers/base_sampler.py b/mmdet/core/bbox/samplers/base_sampler.py index f437195f..9ea35def 100644 --- a/mmdet/core/bbox/samplers/base_sampler.py +++ b/mmdet/core/bbox/samplers/base_sampler.py @@ -6,6 +6,7 @@ class BaseSampler(metaclass=ABCMeta): + """Base class of samplers.""" def __init__(self, num, @@ -22,10 +23,12 @@ def __init__(self, @abstractmethod def _sample_pos(self, assign_result, num_expected, **kwargs): + """Sample positive samples.""" pass @abstractmethod def _sample_neg(self, assign_result, num_expected, **kwargs): + """Sample negative samples.""" pass def sample(self, diff --git a/mmdet/core/bbox/samplers/combined_sampler.py b/mmdet/core/bbox/samplers/combined_sampler.py index 351a097f..564729f0 100644 --- a/mmdet/core/bbox/samplers/combined_sampler.py +++ b/mmdet/core/bbox/samplers/combined_sampler.py @@ -1,8 +1,10 @@ -from ..assign_sampling import build_sampler +from ..builder import BBOX_SAMPLERS, build_sampler from .base_sampler import BaseSampler +@BBOX_SAMPLERS.register_module() class CombinedSampler(BaseSampler): + """A sampler that combines positive sampler and negative sampler.""" def __init__(self, pos_sampler, neg_sampler, **kwargs): super(CombinedSampler, self).__init__(**kwargs) @@ -10,7 +12,9 @@ def __init__(self, pos_sampler, neg_sampler, **kwargs): self.neg_sampler = build_sampler(neg_sampler, **kwargs) def _sample_pos(self, **kwargs): + """Sample positive samples.""" raise NotImplementedError def _sample_neg(self, **kwargs): + """Sample negative samples.""" raise NotImplementedError diff --git a/mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py b/mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py index bc829a23..c7352984 100644 --- a/mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py +++ b/mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py @@ -1,13 +1,26 @@ import numpy as np import torch +from ..builder import BBOX_SAMPLERS from .random_sampler import RandomSampler +@BBOX_SAMPLERS.register_module() class InstanceBalancedPosSampler(RandomSampler): + """Instance balanced sampler that samples equal number of positive samples + for each instance.""" def _sample_pos(self, assign_result, num_expected, **kwargs): - pos_inds = torch.nonzero(assign_result.gt_inds > 0) + """Sample positive boxes. + + Args: + assign_result (:obj:`AssignResult`): The assigned results of boxes. + num_expected (int): The number of expected positive samples + + Returns: + Tensor or ndarray: sampled indices. + """ + pos_inds = torch.nonzero(assign_result.gt_inds > 0, as_tuple=False) if pos_inds.numel() != 0: pos_inds = pos_inds.squeeze(1) if pos_inds.numel() <= num_expected: @@ -18,7 +31,8 @@ def _sample_pos(self, assign_result, num_expected, **kwargs): num_per_gt = int(round(num_expected / float(num_gts)) + 1) sampled_inds = [] for i in unique_gt_inds: - inds = torch.nonzero(assign_result.gt_inds == i.item()) + inds = torch.nonzero( + assign_result.gt_inds == i.item(), as_tuple=False) if inds.numel() != 0: inds = inds.squeeze(1) else: diff --git a/mmdet/core/bbox/samplers/iou_balanced_neg_sampler.py b/mmdet/core/bbox/samplers/iou_balanced_neg_sampler.py index d9239e07..f275e430 100644 --- a/mmdet/core/bbox/samplers/iou_balanced_neg_sampler.py +++ b/mmdet/core/bbox/samplers/iou_balanced_neg_sampler.py @@ -1,11 +1,13 @@ import numpy as np import torch +from ..builder import BBOX_SAMPLERS from .random_sampler import RandomSampler +@BBOX_SAMPLERS.register_module() class IoUBalancedNegSampler(RandomSampler): - """IoU Balanced Sampling + """IoU Balanced Sampling. arXiv: https://arxiv.org/pdf/1904.02701.pdf (CVPR 2019) @@ -42,6 +44,17 @@ def __init__(self, self.num_bins = num_bins def sample_via_interval(self, max_overlaps, full_set, num_expected): + """Sample according to the iou interval. + + Args: + max_overlaps (torch.Tensor): IoU between bounding boxes and ground + truth boxes. + full_set (set(int)): A full set of indices of boxes。 + num_expected (int): Number of expected samples。 + + Returns: + np.ndarray: Indices of samples + """ max_iou = max_overlaps.max() iou_interval = (max_iou - self.floor_thr) / self.num_bins per_num_expected = int(num_expected / self.num_bins) @@ -73,7 +86,16 @@ def sample_via_interval(self, max_overlaps, full_set, num_expected): return sampled_inds def _sample_neg(self, assign_result, num_expected, **kwargs): - neg_inds = torch.nonzero(assign_result.gt_inds == 0) + """Sample negative boxes. + + Args: + assign_result (:obj:`AssignResult`): The assigned results of boxes. + num_expected (int): The number of expected negative samples + + Returns: + Tensor or ndarray: sampled indices. + """ + neg_inds = torch.nonzero(assign_result.gt_inds == 0, as_tuple=False) if neg_inds.numel() != 0: neg_inds = neg_inds.squeeze(1) if len(neg_inds) <= num_expected: diff --git a/mmdet/core/bbox/samplers/ohem_sampler.py b/mmdet/core/bbox/samplers/ohem_sampler.py index 3701d83a..242855be 100644 --- a/mmdet/core/bbox/samplers/ohem_sampler.py +++ b/mmdet/core/bbox/samplers/ohem_sampler.py @@ -1,15 +1,15 @@ import torch +from ..builder import BBOX_SAMPLERS from ..transforms import bbox2roi from .base_sampler import BaseSampler +@BBOX_SAMPLERS.register_module() class OHEMSampler(BaseSampler): - """ - Online Hard Example Mining Sampler described in [1]_. - - References: - .. [1] https://arxiv.org/pdf/1604.03540.pdf + r"""Online Hard Example Mining Sampler described in `Training Region-based + Object Detectors with Online Hard Example Mining + `_. """ def __init__(self, @@ -38,6 +38,7 @@ def hard_mining(self, inds, num_expected, bboxes, labels, feats): loss = self.bbox_head.loss( cls_score=cls_score, bbox_pred=None, + rois=rois, labels=labels, label_weights=cls_score.new_ones(cls_score.size(0)), bbox_targets=None, @@ -52,8 +53,20 @@ def _sample_pos(self, bboxes=None, feats=None, **kwargs): + """Sample positive boxes. + + Args: + assign_result (:obj:`AssignResult`): Assigned results + num_expected (int): Number of expected positive samples + bboxes (torch.Tensor, optional): Boxes. Defaults to None. + feats (list[torch.Tensor], optional): Multi-level features. + Defaults to None. + + Returns: + torch.Tensor: Indices of positive samples + """ # Sample some hard positive samples - pos_inds = torch.nonzero(assign_result.gt_inds > 0) + pos_inds = torch.nonzero(assign_result.gt_inds > 0, as_tuple=False) if pos_inds.numel() != 0: pos_inds = pos_inds.squeeze(1) if pos_inds.numel() <= num_expected: @@ -68,12 +81,26 @@ def _sample_neg(self, bboxes=None, feats=None, **kwargs): + """Sample negative boxes. + + Args: + assign_result (:obj:`AssignResult`): Assigned results + num_expected (int): Number of expected negative samples + bboxes (torch.Tensor, optional): Boxes. Defaults to None. + feats (list[torch.Tensor], optional): Multi-level features. + Defaults to None. + + Returns: + torch.Tensor: Indices of negative samples + """ # Sample some hard negative samples - neg_inds = torch.nonzero(assign_result.gt_inds == 0) + neg_inds = torch.nonzero(assign_result.gt_inds == 0, as_tuple=False) if neg_inds.numel() != 0: neg_inds = neg_inds.squeeze(1) if len(neg_inds) <= num_expected: return neg_inds else: + neg_labels = assign_result.labels.new_empty( + neg_inds.size(0)).fill_(self.bbox_head.num_classes) return self.hard_mining(neg_inds, num_expected, bboxes[neg_inds], - assign_result.labels[neg_inds], feats) + neg_labels, feats) diff --git a/mmdet/core/bbox/samplers/pseudo_sampler.py b/mmdet/core/bbox/samplers/pseudo_sampler.py index b4c2ea09..2bd81abc 100644 --- a/mmdet/core/bbox/samplers/pseudo_sampler.py +++ b/mmdet/core/bbox/samplers/pseudo_sampler.py @@ -1,25 +1,40 @@ import torch +from ..builder import BBOX_SAMPLERS from .base_sampler import BaseSampler from .sampling_result import SamplingResult +@BBOX_SAMPLERS.register_module() class PseudoSampler(BaseSampler): + """A pseudo sampler that does not do sampling actually.""" def __init__(self, **kwargs): pass def _sample_pos(self, **kwargs): + """Sample positive samples.""" raise NotImplementedError def _sample_neg(self, **kwargs): + """Sample negative samples.""" raise NotImplementedError def sample(self, assign_result, bboxes, gt_bboxes, **kwargs): + """Directly returns the positive and negative indices of samples. + + Args: + assign_result (:obj:`AssignResult`): Assigned results + bboxes (torch.Tensor): Bounding boxes + gt_bboxes (torch.Tensor): Ground truth boxes + + Returns: + :obj:`SamplingResult`: sampler results + """ pos_inds = torch.nonzero( - assign_result.gt_inds > 0).squeeze(-1).unique() + assign_result.gt_inds > 0, as_tuple=False).squeeze(-1).unique() neg_inds = torch.nonzero( - assign_result.gt_inds == 0).squeeze(-1).unique() + assign_result.gt_inds == 0, as_tuple=False).squeeze(-1).unique() gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8) sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes, assign_result, gt_flags) diff --git a/mmdet/core/bbox/samplers/random_sampler.py b/mmdet/core/bbox/samplers/random_sampler.py index 261ca9c6..558babdb 100644 --- a/mmdet/core/bbox/samplers/random_sampler.py +++ b/mmdet/core/bbox/samplers/random_sampler.py @@ -1,9 +1,21 @@ import torch +from ..builder import BBOX_SAMPLERS from .base_sampler import BaseSampler +@BBOX_SAMPLERS.register_module() class RandomSampler(BaseSampler): + """Random sampler. + + Args: + num (int): Number of samples + pos_fraction (float): Fraction of positive samples + neg_pos_up (int, optional): Upper bound number of negative and + positive samples. Defaults to -1. + add_gt_as_proposals (bool, optional): Whether to add ground truth + boxes as proposals. Defaults to True. + """ def __init__(self, num, @@ -44,7 +56,7 @@ def random_choice(self, gallery, num): def _sample_pos(self, assign_result, num_expected, **kwargs): """Randomly sample some positive samples.""" - pos_inds = torch.nonzero(assign_result.gt_inds > 0) + pos_inds = torch.nonzero(assign_result.gt_inds > 0, as_tuple=False) if pos_inds.numel() != 0: pos_inds = pos_inds.squeeze(1) if pos_inds.numel() <= num_expected: @@ -54,7 +66,7 @@ def _sample_pos(self, assign_result, num_expected, **kwargs): def _sample_neg(self, assign_result, num_expected, **kwargs): """Randomly sample some negative samples.""" - neg_inds = torch.nonzero(assign_result.gt_inds == 0) + neg_inds = torch.nonzero(assign_result.gt_inds == 0, as_tuple=False) if neg_inds.numel() != 0: neg_inds = neg_inds.squeeze(1) if len(neg_inds) <= num_expected: diff --git a/mmdet/core/bbox/samplers/sampling_result.py b/mmdet/core/bbox/samplers/sampling_result.py index dcf25eec..419a8e39 100644 --- a/mmdet/core/bbox/samplers/sampling_result.py +++ b/mmdet/core/bbox/samplers/sampling_result.py @@ -4,12 +4,13 @@ class SamplingResult(util_mixins.NiceRepr): - """ + """Bbox sampling result. + Example: >>> # xdoctest: +IGNORE_WANT >>> from mmdet.core.bbox.samplers.sampling_result import * # NOQA >>> self = SamplingResult.random(rng=10) - >>> print('self = {}'.format(self)) + >>> print(f'self = {self}') self = >> self = SamplingResult.random() - >>> print('self = {}'.format(self.to(None))) + >>> print(f'self = {self.to(None)}') >>> # xdoctest: +REQUIRES(--gpu) - >>> print('self = {}'.format(self.to(0))) + >>> print(f'self = {self.to(0)}') """ _dict = self.__dict__ for key, value in _dict.items(): @@ -71,15 +72,13 @@ def __nice__(self): data = self.info.copy() data['pos_bboxes'] = data.pop('pos_bboxes').shape data['neg_bboxes'] = data.pop('neg_bboxes').shape - parts = ['\'{}\': {!r}'.format(k, v) for k, v in sorted(data.items())] + parts = [f"'{k}': {v!r}" for k, v in sorted(data.items())] body = ' ' + ',\n '.join(parts) return '{\n' + body + '\n}' @property def info(self): - """ - Returns a dictionary of info about the object - """ + """Returns a dictionary of info about the object.""" return { 'pos_inds': self.pos_inds, 'neg_inds': self.neg_inds, @@ -94,19 +93,18 @@ def info(self): def random(cls, rng=None, **kwargs): """ Args: - rng (None | int | numpy.random.RandomState): seed or state - - Kwargs: - num_preds: number of predicted boxes - num_gts: number of true boxes - p_ignore (float): probability of a predicted box assinged to an - ignored truth - p_assigned (float): probability of a predicted box not being - assigned - p_use_label (float | bool): with labels or not + rng (None | int | numpy.random.RandomState): seed or state. + kwargs (keyword arguments): + - num_preds: number of predicted boxes + - num_gts: number of true boxes + - p_ignore (float): probability of a predicted box assinged to \ + an ignored truth. + - p_assigned (float): probability of a predicted box not being \ + assigned. + - p_use_label (float | bool): with labels or not. Returns: - AssignResult : + :obj:`SamplingResult`: Randomly generated sampling result. Example: >>> from mmdet.core.bbox.samplers.sampling_result import * # NOQA @@ -147,7 +145,7 @@ def random(cls, rng=None, **kwargs): sampler = RandomSampler( num, pos_fraction, - neg_pos_ubo=neg_pos_ub, + neg_pos_ub=neg_pos_ub, add_gt_as_proposals=add_gt_as_proposals, rng=rng) self = sampler.sample(assign_result, bboxes, gt_bboxes, gt_labels) diff --git a/mmdet/core/bbox/samplers/score_hlr_sampler.py b/mmdet/core/bbox/samplers/score_hlr_sampler.py new file mode 100644 index 00000000..3089451a --- /dev/null +++ b/mmdet/core/bbox/samplers/score_hlr_sampler.py @@ -0,0 +1,261 @@ +import torch +from mmcv.ops import nms_match + +from ..builder import BBOX_SAMPLERS +from ..transforms import bbox2roi +from .base_sampler import BaseSampler +from .sampling_result import SamplingResult + + +@BBOX_SAMPLERS.register_module() +class ScoreHLRSampler(BaseSampler): + r"""Importance-based Sample Reweighting (ISR_N), described in `Prime Sample + Attention in Object Detection `_. + + Score hierarchical local rank (HLR) differentiates with RandomSampler in + negative part. It firstly computes Score-HLR in a two-step way, + then linearly maps score hlr to the loss weights. + + Args: + num (int): Total number of sampled RoIs. + pos_fraction (float): Fraction of positive samples. + context (:class:`BaseRoIHead`): RoI head that the sampler belongs to. + neg_pos_ub (int): Upper bound of the ratio of num negative to num + positive, -1 means no upper bound. + add_gt_as_proposals (bool): Whether to add ground truth as proposals. + k (float): Power of the non-linear mapping. + bias (float): Shift of the non-linear mapping. + score_thr (float): Minimum score that a negative sample is to be + considered as valid bbox. + """ + + def __init__(self, + num, + pos_fraction, + context, + neg_pos_ub=-1, + add_gt_as_proposals=True, + k=0.5, + bias=0, + score_thr=0.05, + iou_thr=0.5, + **kwargs): + super().__init__(num, pos_fraction, neg_pos_ub, add_gt_as_proposals) + self.k = k + self.bias = bias + self.score_thr = score_thr + self.iou_thr = iou_thr + self.context = context + # context of cascade detectors is a list, so distinguish them here. + if not hasattr(context, 'num_stages'): + self.bbox_roi_extractor = context.bbox_roi_extractor + self.bbox_head = context.bbox_head + self.with_shared_head = context.with_shared_head + if self.with_shared_head: + self.shared_head = context.shared_head + else: + self.bbox_roi_extractor = context.bbox_roi_extractor[ + context.current_stage] + self.bbox_head = context.bbox_head[context.current_stage] + + @staticmethod + def random_choice(gallery, num): + """Randomly select some elements from the gallery. + + If `gallery` is a Tensor, the returned indices will be a Tensor; + If `gallery` is a ndarray or list, the returned indices will be a + ndarray. + + Args: + gallery (Tensor | ndarray | list): indices pool. + num (int): expected sample num. + + Returns: + Tensor or ndarray: sampled indices. + """ + assert len(gallery) >= num + + is_tensor = isinstance(gallery, torch.Tensor) + if not is_tensor: + gallery = torch.tensor( + gallery, dtype=torch.long, device=torch.cuda.current_device()) + perm = torch.randperm(gallery.numel(), device=gallery.device)[:num] + rand_inds = gallery[perm] + if not is_tensor: + rand_inds = rand_inds.cpu().numpy() + return rand_inds + + def _sample_pos(self, assign_result, num_expected, **kwargs): + """Randomly sample some positive samples.""" + pos_inds = torch.nonzero(assign_result.gt_inds > 0).flatten() + if pos_inds.numel() <= num_expected: + return pos_inds + else: + return self.random_choice(pos_inds, num_expected) + + def _sample_neg(self, + assign_result, + num_expected, + bboxes, + feats=None, + img_meta=None, + **kwargs): + """Sample negative samples. + + Score-HLR sampler is done in the following steps: + 1. Take the maximum positive score prediction of each negative samples + as s_i. + 2. Filter out negative samples whose s_i <= score_thr, the left samples + are called valid samples. + 3. Use NMS-Match to divide valid samples into different groups, + samples in the same group will greatly overlap with each other + 4. Rank the matched samples in two-steps to get Score-HLR. + (1) In the same group, rank samples with their scores. + (2) In the same score rank across different groups, + rank samples with their scores again. + 5. Linearly map Score-HLR to the final label weights. + + Args: + assign_result (:obj:`AssignResult`): result of assigner. + num_expected (int): Expected number of samples. + bboxes (Tensor): bbox to be sampled. + feats (Tensor): Features come from FPN. + img_meta (dict): Meta information dictionary. + """ + neg_inds = torch.nonzero(assign_result.gt_inds == 0).flatten() + num_neg = neg_inds.size(0) + if num_neg == 0: + return neg_inds, None + with torch.no_grad(): + neg_bboxes = bboxes[neg_inds] + neg_rois = bbox2roi([neg_bboxes]) + bbox_result = self.context._bbox_forward(feats, neg_rois) + cls_score, bbox_pred = bbox_result['cls_score'], bbox_result[ + 'bbox_pred'] + + ori_loss = self.bbox_head.loss( + cls_score=cls_score, + bbox_pred=None, + rois=None, + labels=neg_inds.new_full((num_neg, ), + self.bbox_head.num_classes), + label_weights=cls_score.new_ones(num_neg), + bbox_targets=None, + bbox_weights=None, + reduction_override='none')['loss_cls'] + + # filter out samples with the max score lower than score_thr + max_score, argmax_score = cls_score.softmax(-1)[:, :-1].max(-1) + valid_inds = (max_score > self.score_thr).nonzero().view(-1) + invalid_inds = (max_score <= self.score_thr).nonzero().view(-1) + num_valid = valid_inds.size(0) + num_invalid = invalid_inds.size(0) + + num_expected = min(num_neg, num_expected) + num_hlr = min(num_valid, num_expected) + num_rand = num_expected - num_hlr + if num_valid > 0: + valid_rois = neg_rois[valid_inds] + valid_max_score = max_score[valid_inds] + valid_argmax_score = argmax_score[valid_inds] + valid_bbox_pred = bbox_pred[valid_inds] + + # valid_bbox_pred shape: [num_valid, #num_classes, 4] + valid_bbox_pred = valid_bbox_pred.view( + valid_bbox_pred.size(0), -1, 4) + selected_bbox_pred = valid_bbox_pred[range(num_valid), + valid_argmax_score] + pred_bboxes = self.bbox_head.bbox_coder.decode( + valid_rois[:, 1:], selected_bbox_pred) + pred_bboxes_with_score = torch.cat( + [pred_bboxes, valid_max_score[:, None]], -1) + group = nms_match(pred_bboxes_with_score, self.iou_thr) + + # imp: importance + imp = cls_score.new_zeros(num_valid) + for g in group: + g_score = valid_max_score[g] + # g_score has already sorted + rank = g_score.new_tensor(range(g_score.size(0))) + imp[g] = num_valid - rank + g_score + _, imp_rank_inds = imp.sort(descending=True) + _, imp_rank = imp_rank_inds.sort() + hlr_inds = imp_rank_inds[:num_expected] + + if num_rand > 0: + rand_inds = torch.randperm(num_invalid)[:num_rand] + select_inds = torch.cat( + [valid_inds[hlr_inds], invalid_inds[rand_inds]]) + else: + select_inds = valid_inds[hlr_inds] + + neg_label_weights = cls_score.new_ones(num_expected) + + up_bound = max(num_expected, num_valid) + imp_weights = (up_bound - + imp_rank[hlr_inds].float()) / up_bound + neg_label_weights[:num_hlr] = imp_weights + neg_label_weights[num_hlr:] = imp_weights.min() + neg_label_weights = (self.bias + + (1 - self.bias) * neg_label_weights).pow( + self.k) + ori_selected_loss = ori_loss[select_inds] + new_loss = ori_selected_loss * neg_label_weights + norm_ratio = ori_selected_loss.sum() / new_loss.sum() + neg_label_weights *= norm_ratio + else: + neg_label_weights = cls_score.new_ones(num_expected) + select_inds = torch.randperm(num_neg)[:num_expected] + + return neg_inds[select_inds], neg_label_weights + + def sample(self, + assign_result, + bboxes, + gt_bboxes, + gt_labels=None, + img_meta=None, + **kwargs): + """Sample positive and negative bboxes. + + This is a simple implementation of bbox sampling given candidates, + assigning results and ground truth bboxes. + + Args: + assign_result (:obj:`AssignResult`): Bbox assigning results. + bboxes (Tensor): Boxes to be sampled from. + gt_bboxes (Tensor): Ground truth bboxes. + gt_labels (Tensor, optional): Class labels of ground truth bboxes. + + Returns: + tuple[:obj:`SamplingResult`, Tensor]: Sampling result and negetive + label weights. + """ + bboxes = bboxes[:, :4] + + gt_flags = bboxes.new_zeros((bboxes.shape[0], ), dtype=torch.uint8) + if self.add_gt_as_proposals: + bboxes = torch.cat([gt_bboxes, bboxes], dim=0) + assign_result.add_gt_(gt_labels) + gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.uint8) + gt_flags = torch.cat([gt_ones, gt_flags]) + + num_expected_pos = int(self.num * self.pos_fraction) + pos_inds = self.pos_sampler._sample_pos( + assign_result, num_expected_pos, bboxes=bboxes, **kwargs) + num_sampled_pos = pos_inds.numel() + num_expected_neg = self.num - num_sampled_pos + if self.neg_pos_ub >= 0: + _pos = max(1, num_sampled_pos) + neg_upper_bound = int(self.neg_pos_ub * _pos) + if num_expected_neg > neg_upper_bound: + num_expected_neg = neg_upper_bound + neg_inds, neg_label_weights = self.neg_sampler._sample_neg( + assign_result, + num_expected_neg, + bboxes, + img_meta=img_meta, + **kwargs) + + return SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes, + assign_result, gt_flags), neg_label_weights diff --git a/mmdet/core/bbox/transforms.py b/mmdet/core/bbox/transforms.py index b9d1e660..5dab183e 100644 --- a/mmdet/core/bbox/transforms.py +++ b/mmdet/core/bbox/transforms.py @@ -2,7 +2,6 @@ import numpy as np import torch - def bbox2delta(proposals, gt, means=[0, 0, 0, 0], stds=[1, 1, 1, 1]): assert proposals.size() == gt.size() @@ -31,6 +30,7 @@ def bbox2delta(proposals, gt, means=[0, 0, 0, 0], stds=[1, 1, 1, 1]): return deltas + def delta2bbox(rois, deltas, means=[0, 0, 0, 0], @@ -111,39 +111,54 @@ def delta2bbox(rois, return bboxes -def bbox_flip(bboxes, img_shape): - """Flip bboxes horizontally. + +def bbox_flip(bboxes, img_shape, direction='horizontal'): + """Flip bboxes horizontally or vertically. Args: - bboxes(Tensor or ndarray): Shape (..., 4*k) - img_shape(tuple): Image shape. + bboxes (Tensor): Shape (..., 4*k) + img_shape (tuple): Image shape. + direction (str): Flip direction, options are "horizontal" and + "vertical". Default: "horizontal" + Returns: - Same type as `bboxes`: Flipped bboxes. + Tensor: Flipped bboxes. """ - if isinstance(bboxes, torch.Tensor): - assert bboxes.shape[-1] % 4 == 0 - flipped = bboxes.clone() - flipped[:, 0::4] = img_shape[1] - bboxes[:, 2::4] - 1 - flipped[:, 2::4] = img_shape[1] - bboxes[:, 0::4] - 1 - return flipped - elif isinstance(bboxes, np.ndarray): - return mmcv.bbox_flip(bboxes, img_shape) - - -def bbox_mapping(bboxes, img_shape, scale_factor, flip): - """Map bboxes from the original image scale to testing scale""" - new_bboxes = bboxes * scale_factor + assert bboxes.shape[-1] % 4 == 0 + assert direction in ['horizontal', 'vertical'] + flipped = bboxes.clone() + if direction == 'vertical': + flipped[..., 1::4] = img_shape[0] - bboxes[..., 3::4] + flipped[..., 3::4] = img_shape[0] - bboxes[..., 1::4] + else: + flipped[:, 0::4] = img_shape[1] - bboxes[:, 2::4] + flipped[:, 2::4] = img_shape[1] - bboxes[:, 0::4] + return flipped + + +def bbox_mapping(bboxes, + img_shape, + scale_factor, + flip, + flip_direction='horizontal'): + """Map bboxes from the original image scale to testing scale.""" + new_bboxes = bboxes * bboxes.new_tensor(scale_factor) if flip: - new_bboxes = bbox_flip(new_bboxes, img_shape) + new_bboxes = bbox_flip(new_bboxes, img_shape, flip_direction) return new_bboxes -def bbox_mapping_back(bboxes, img_shape, scale_factor, flip): - """Map bboxes from testing scale to original image scale""" - new_bboxes = bbox_flip(bboxes, img_shape) if flip else bboxes - new_bboxes = new_bboxes / scale_factor - return new_bboxes +def bbox_mapping_back(bboxes, + img_shape, + scale_factor, + flip, + flip_direction='horizontal'): + """Map bboxes from testing scale to original image scale.""" + new_bboxes = bbox_flip(bboxes, img_shape, + flip_direction) if flip else bboxes + new_bboxes = new_bboxes.view(-1, 4) / new_bboxes.new_tensor(scale_factor) + return new_bboxes.view(bboxes.shape) def bbox2roi(bbox_list): @@ -169,6 +184,15 @@ def bbox2roi(bbox_list): def roi2bbox(rois): + """Convert rois to bounding box format. + + Args: + rois (torch.Tensor): RoIs with the shape (n, 5) where the first + column indicates batch id of each RoI. + + Returns: + list[torch.Tensor]: Converted boxes of corresponding rois. + """ bbox_list = [] img_ids = torch.unique(rois[:, 0].cpu(), sorted=True) for img_id in img_ids: @@ -190,13 +214,11 @@ def bbox2result(bboxes, labels, num_classes): list(ndarray): bbox results of each class """ if bboxes.shape[0] == 0: - return [ - np.zeros((0, 5), dtype=np.float32) for i in range(num_classes - 1) - ] + return [np.zeros((0, 5), dtype=np.float32) for i in range(num_classes)] else: bboxes = bboxes.cpu().numpy() labels = labels.cpu().numpy() - return [bboxes[labels == i, :] for i in range(num_classes - 1)] + return [bboxes[labels == i, :] for i in range(num_classes)] def distance2bbox(points, distance, max_shape=None): @@ -216,8 +238,32 @@ def distance2bbox(points, distance, max_shape=None): x2 = points[:, 0] + distance[:, 2] y2 = points[:, 1] + distance[:, 3] if max_shape is not None: - x1 = x1.clamp(min=0, max=max_shape[1] - 1) - y1 = y1.clamp(min=0, max=max_shape[0] - 1) - x2 = x2.clamp(min=0, max=max_shape[1] - 1) - y2 = y2.clamp(min=0, max=max_shape[0] - 1) + x1 = x1.clamp(min=0, max=max_shape[1]) + y1 = y1.clamp(min=0, max=max_shape[0]) + x2 = x2.clamp(min=0, max=max_shape[1]) + y2 = y2.clamp(min=0, max=max_shape[0]) return torch.stack([x1, y1, x2, y2], -1) + + +def bbox2distance(points, bbox, max_dis=None, eps=0.1): + """Decode bounding box based on distances. + + Args: + points (Tensor): Shape (n, 2), [x, y]. + bbox (Tensor): Shape (n, 4), "xyxy" format + max_dis (float): Upper bound of the distance. + eps (float): a small value to ensure target < max_dis, instead <= + + Returns: + Tensor: Decoded distances. + """ + left = points[:, 0] - bbox[:, 0] + top = points[:, 1] - bbox[:, 1] + right = bbox[:, 2] - points[:, 0] + bottom = bbox[:, 3] - points[:, 1] + if max_dis is not None: + left = left.clamp(min=0, max=max_dis - eps) + top = top.clamp(min=0, max=max_dis - eps) + right = right.clamp(min=0, max=max_dis - eps) + bottom = bottom.clamp(min=0, max=max_dis - eps) + return torch.stack([left, top, right, bottom], -1) diff --git a/mmdet/datasets/pipelines/transforms.py b/mmdet/datasets/pipelines/transforms.py index 69fad019..53c38a76 100644 --- a/mmdet/datasets/pipelines/transforms.py +++ b/mmdet/datasets/pipelines/transforms.py @@ -306,7 +306,7 @@ def _pad_masks(self, results): pad_shape = results['pad_shape'][:2] for key in results.get('mask_fields', []): padded_masks = [ - mmcv.impad(mask, pad_shape, pad_val=self.pad_val) + mmcv.impad(mask, shape=pad_shape, pad_val=self.pad_val) for mask in results[key] ] if padded_masks: diff --git a/mmdet/models/bbox_heads/double_bbox_head.py b/mmdet/models/bbox_heads/double_bbox_head.py index c5b66f0a..73b30456 100644 --- a/mmdet/models/bbox_heads/double_bbox_head.py +++ b/mmdet/models/bbox_heads/double_bbox_head.py @@ -1,5 +1,5 @@ import torch.nn as nn -from mmcv.cnn.weight_init import normal_init, xavier_init +from mmcv.cnn import normal_init, xavier_init from mmdet.ops import ConvModule from ..backbones.resnet import Bottleneck diff --git a/mmdet/models/losses/__init__.py b/mmdet/models/losses/__init__.py index 07731d71..438a7248 100644 --- a/mmdet/models/losses/__init__.py +++ b/mmdet/models/losses/__init__.py @@ -4,7 +4,7 @@ cross_entropy, mask_cross_entropy) from .focal_loss import FocalLoss, sigmoid_focal_loss from .ghm_loss import GHMC, GHMR -from .iou_loss import (BoundedIoULoss, GIoULoss, IoULoss, bounded_iou_loss, +from .iou_loss import (BoundedIoULoss, DIoULoss, IoULoss, bounded_iou_loss, iou_loss) from .mse_loss import MSELoss, mse_loss from .smooth_l1_loss import SmoothL1Loss, smooth_l1_loss @@ -15,6 +15,6 @@ 'mask_cross_entropy', 'CrossEntropyLoss', 'sigmoid_focal_loss', 'FocalLoss', 'smooth_l1_loss', 'SmoothL1Loss', 'balanced_l1_loss', 'BalancedL1Loss', 'mse_loss', 'MSELoss', 'iou_loss', 'bounded_iou_loss', - 'IoULoss', 'BoundedIoULoss', 'GIoULoss', 'GHMC', 'GHMR', 'reduce_loss', + 'IoULoss', 'BoundedIoULoss', 'DIoULoss', 'GHMC', 'GHMR', 'reduce_loss', 'weight_reduce_loss', 'weighted_loss' ] diff --git a/mmdet/models/losses/iou_loss.py b/mmdet/models/losses/iou_loss.py index c19c1d1d..409c0fc0 100644 --- a/mmdet/models/losses/iou_loss.py +++ b/mmdet/models/losses/iou_loss.py @@ -69,34 +69,75 @@ def bounded_iou_loss(pred, target, beta=0.2, eps=1e-3): return loss +# @weighted_loss +# def giou_loss(pred, target, eps=1e-7): +# """ +# Generalized Intersection over Union: A Metric and A Loss for +# Bounding Box Regression +# https://arxiv.org/abs/1902.09630 +# +# code refer to: +# https://github.com/sfzhang15/ATSS/blob/master/atss_core/modeling/rpn/atss/loss.py#L36 +# +# Args: +# pred (Tensor): Predicted bboxes of format (x1, y1, x2, y2), +# shape (n, 4). +# target (Tensor): Corresponding gt bboxes, shape (n, 4). +# eps (float): Eps to avoid log(0). +# +# Return: +# Tensor: Loss tensor. +# """ +# # overlap +# lt = torch.max(pred[:, :2], target[:, :2]) +# rb = torch.min(pred[:, 2:], target[:, 2:]) +# wh = (rb - lt + 1).clamp(min=0) +# overlap = wh[:, 0] * wh[:, 1] +# +# # union +# ap = (pred[:, 2] - pred[:, 0] + 1) * (pred[:, 3] - pred[:, 1] + 1) +# ag = (target[:, 2] - target[:, 0] + 1) * (target[:, 3] - target[:, 1] + 1) +# union = ap + ag - overlap + eps +# +# # IoU +# ious = overlap / union +# +# # enclose area +# enclose_x1y1 = torch.min(pred[:, :2], target[:, :2]) +# enclose_x2y2 = torch.max(pred[:, 2:], target[:, 2:]) +# enclose_wh = (enclose_x2y2 - enclose_x1y1 + 1).clamp(min=0) +# enclose_area = enclose_wh[:, 0] * enclose_wh[:, 1] + eps +# +# # GIoU +# gious = ious - (enclose_area - union) / enclose_area +# loss = 1 - gious +# return loss + + @weighted_loss -def giou_loss(pred, target, eps=1e-7): - """ - Generalized Intersection over Union: A Metric and A Loss for - Bounding Box Regression - https://arxiv.org/abs/1902.09630 +def diou_loss(pred, target, eps=1e-7): + r"""`Implementation of Distance-IoU Loss: Faster and Better + Learning for Bounding Box Regression, https://arxiv.org/abs/1911.08287`_. - code refer to: - https://github.com/sfzhang15/ATSS/blob/master/atss_core/modeling/rpn/atss/loss.py#L36 + Code is modified from https://github.com/Zzh-tju/DIoU. Args: pred (Tensor): Predicted bboxes of format (x1, y1, x2, y2), shape (n, 4). target (Tensor): Corresponding gt bboxes, shape (n, 4). eps (float): Eps to avoid log(0). - Return: Tensor: Loss tensor. """ # overlap lt = torch.max(pred[:, :2], target[:, :2]) rb = torch.min(pred[:, 2:], target[:, 2:]) - wh = (rb - lt + 1).clamp(min=0) + wh = (rb - lt).clamp(min=0) overlap = wh[:, 0] * wh[:, 1] # union - ap = (pred[:, 2] - pred[:, 0] + 1) * (pred[:, 3] - pred[:, 1] + 1) - ag = (target[:, 2] - target[:, 0] + 1) * (target[:, 3] - target[:, 1] + 1) + ap = (pred[:, 2] - pred[:, 0]) * (pred[:, 3] - pred[:, 1]) + ag = (target[:, 2] - target[:, 0]) * (target[:, 3] - target[:, 1]) union = ap + ag - overlap + eps # IoU @@ -105,15 +146,29 @@ def giou_loss(pred, target, eps=1e-7): # enclose area enclose_x1y1 = torch.min(pred[:, :2], target[:, :2]) enclose_x2y2 = torch.max(pred[:, 2:], target[:, 2:]) - enclose_wh = (enclose_x2y2 - enclose_x1y1 + 1).clamp(min=0) - enclose_area = enclose_wh[:, 0] * enclose_wh[:, 1] + eps + enclose_wh = (enclose_x2y2 - enclose_x1y1).clamp(min=0) + + cw = enclose_wh[:, 0] + ch = enclose_wh[:, 1] + + c2 = cw**2 + ch**2 + eps - # GIoU - gious = ious - (enclose_area - union) / enclose_area - loss = 1 - gious + b1_x1, b1_y1 = pred[:, 0], pred[:, 1] + b1_x2, b1_y2 = pred[:, 2], pred[:, 3] + b2_x1, b2_y1 = target[:, 0], target[:, 1] + b2_x2, b2_y2 = target[:, 2], target[:, 3] + + left = ((b2_x1 + b2_x2) - (b1_x1 + b1_x2))**2 / 4 + right = ((b2_y1 + b2_y2) - (b1_y1 + b1_y2))**2 / 4 + rho2 = left + right + + # DIoU + dious = ious - rho2 / c2 + loss = 1 - dious return loss + @LOSSES.register_module class IoULoss(nn.Module): @@ -180,11 +235,11 @@ def forward(self, return loss -@LOSSES.register_module -class GIoULoss(nn.Module): +@LOSSES.register_module() +class DIoULoss(nn.Module): def __init__(self, eps=1e-6, reduction='mean', loss_weight=1.0): - super(GIoULoss, self).__init__() + super(DIoULoss, self).__init__() self.eps = eps self.reduction = reduction self.loss_weight = loss_weight @@ -201,7 +256,13 @@ def forward(self, assert reduction_override in (None, 'none', 'mean', 'sum') reduction = ( reduction_override if reduction_override else self.reduction) - loss = self.loss_weight * giou_loss( + if weight is not None and weight.dim() > 1: + # TODO: remove this in the future + # reduce the weight of shape (n, 4) to (n,) to match the + # giou_loss of shape (n,) + assert weight.shape == pred.shape + weight = weight.mean(-1) + loss = self.loss_weight * diou_loss( pred, target, weight, @@ -210,3 +271,41 @@ def forward(self, avg_factor=avg_factor, **kwargs) return loss + + +# @LOSSES.register_module +# class GIoULoss(nn.Module): +# +# def __init__(self, eps=1e-6, reduction='mean', loss_weight=1.0): +# super(GIoULoss, self).__init__() +# self.eps = eps +# self.reduction = reduction +# self.loss_weight = loss_weight +# +# def forward(self, +# pred, +# target, +# weight=None, +# avg_factor=None, +# reduction_override=None, +# **kwargs): +# if weight is not None and not torch.any(weight > 0): +# return (pred * weight).sum() # 0 +# assert reduction_override in (None, 'none', 'mean', 'sum') +# reduction = ( +# reduction_override if reduction_override else self.reduction) +# if weight is not None and weight.dim() > 1: +# # TODO: remove this in the future +# # reduce the weight of shape (n, 4) to (n,) to match the +# # giou_loss of shape (n,) +# assert weight.shape == pred.shape +# weight = weight.mean(-1) +# loss = self.loss_weight * giou_loss( +# pred, +# target, +# weight, +# eps=self.eps, +# reduction=reduction, +# avg_factor=avg_factor, +# **kwargs) +# return loss diff --git a/mmdet/models/necks/hrfpn.py b/mmdet/models/necks/hrfpn.py index f8c466c6..4f735342 100644 --- a/mmdet/models/necks/hrfpn.py +++ b/mmdet/models/necks/hrfpn.py @@ -1,7 +1,7 @@ import torch import torch.nn as nn import torch.nn.functional as F -from mmcv.cnn.weight_init import caffe2_xavier_init +from mmcv.cnn import caffe2_xavier_init from torch.utils.checkpoint import checkpoint from mmdet.ops import ConvModule diff --git a/mmdet/ops/conv_ws.py b/mmdet/ops/conv_ws.py index 1521cbc3..8bc26713 100644 --- a/mmdet/ops/conv_ws.py +++ b/mmdet/ops/conv_ws.py @@ -81,7 +81,7 @@ def _get_weight(self, weight): def forward(self, x): weight = self._get_weight(self.weight) - return super().conv2d_forward(x, weight) + return super()._conv_forward(x, weight) def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs): diff --git a/mmdet/ops/nms/nms_wrapper.py b/mmdet/ops/nms/nms_wrapper.py index 145a218e..ac4389fe 100644 --- a/mmdet/ops/nms/nms_wrapper.py +++ b/mmdet/ops/nms/nms_wrapper.py @@ -51,7 +51,8 @@ def nms(dets, iou_thr, device_id=None): inds = dets_th.new_zeros(0, dtype=torch.long) else: if dets_th.is_cuda: - inds = nms_cuda.nms(dets_th, iou_thr) + # inds = nms_cuda.nms(dets_th, iou_thr) + inds = nms_cuda.nms(dets_th.cpu(), iou_thr).cuda() else: inds = nms_cpu.nms(dets_th, iou_thr) diff --git a/requirements/build.txt b/requirements/build.txt index 7db8afe8..68074e6e 100644 --- a/requirements/build.txt +++ b/requirements/build.txt @@ -1,3 +1,3 @@ # These must be installed before building mmdetection numpy -torch>=1.2,<=1.4 +torch==1.5.1 diff --git a/requirements/runtime.txt b/requirements/runtime.txt index 5578282d..1c35e556 100644 --- a/requirements/runtime.txt +++ b/requirements/runtime.txt @@ -2,8 +2,8 @@ matplotlib mmcv>=0.3.1 numpy # need older pillow until torchvision is fixed -Pillow<=6.2.2 +Pillow==7.2.0 six terminaltables -torch>=1.2,<=1.4 +torch==1.5.1 torchvision