diff --git a/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py b/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py index 305034132..66007bd4e 100644 --- a/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py +++ b/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py @@ -277,6 +277,33 @@ priority=49) ] +autoanchor_hook = dict( + type='YOLOAutoAnchorHook', + optimizer=dict( + type='YOLOV5KMeansAnchorOptimizer', + iters=1000, + num_anchor_per_level=[3, 3, 3], + prior_match_thr=4.0, + mutation_args=[0.9, 0.1], + augment_args=[0.9, 0.1])) + +# You can comment out the existing autoanchor hook, +# and then select the autoanchor you want and uncomment it. + +# autoanchor_hook = dict( +# type='YOLOAutoAnchorHook', +# optimizer=dict( +# type='YOLOKMeansAnchorOptimizer', +# iters=1000, +# num_anchor_per_level=[3, 3, 3])) + +# autoanchor_hook = dict( +# type='YOLOAutoAnchorHook', +# optimizer=dict( +# type='YOLODEAnchorOptimizer', +# iters=1000, +# num_anchor_per_level=[3, 3, 3])) + val_evaluator = dict( type='mmdet.CocoMetric', proposal_nums=(100, 1, 10), diff --git a/mmyolo/engine/hooks/__init__.py b/mmyolo/engine/hooks/__init__.py index 0b8deebc8..a16a2b9ed 100644 --- a/mmyolo/engine/hooks/__init__.py +++ b/mmyolo/engine/hooks/__init__.py @@ -1,10 +1,11 @@ # Copyright (c) OpenMMLab. All rights reserved. from .ppyoloe_param_scheduler_hook import PPYOLOEParamSchedulerHook from .switch_to_deploy_hook import SwitchToDeployHook +from .yolo_auto_anchor_hook import YOLOAutoAnchorHook from .yolov5_param_scheduler_hook import YOLOv5ParamSchedulerHook from .yolox_mode_switch_hook import YOLOXModeSwitchHook __all__ = [ 'YOLOv5ParamSchedulerHook', 'YOLOXModeSwitchHook', 'SwitchToDeployHook', - 'PPYOLOEParamSchedulerHook' + 'PPYOLOEParamSchedulerHook', 'YOLOAutoAnchorHook' ] diff --git a/mmyolo/engine/hooks/yolo_auto_anchor_hook.py b/mmyolo/engine/hooks/yolo_auto_anchor_hook.py new file mode 100644 index 000000000..b30db8719 --- /dev/null +++ b/mmyolo/engine/hooks/yolo_auto_anchor_hook.py @@ -0,0 +1,99 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from mmengine.dist import broadcast, get_dist_info +from mmengine.hooks import Hook +from mmengine.logging import MMLogger +from mmengine.model import is_model_wrapper +from mmengine.runner import Runner + +from mmyolo.registry import HOOKS, TASK_UTILS + + +@HOOKS.register_module() +class YOLOAutoAnchorHook(Hook): + + priority = 48 + + # YOLOAutoAnchorHook takes priority over EMAHook. + + def __init__(self, optimizer): + + self.optimizer = optimizer + print('YOLOAutoAnchorHook should take priority over EMAHook, ' + 'the default priority of EMAHook is 49, so the priority of ' + 'YOLOAutoAnchorHook is 48') + + def before_run(self, runner) -> None: + + model = runner.model + if is_model_wrapper(model): + model = model.module + + device = next(model.parameters()).device + anchors = torch.tensor( + model.bbox_head.prior_generator.base_sizes, device=device) + model.register_buffer('anchors', anchors) + + def before_train(self, runner: Runner) -> None: + + if runner.iter > 0: + return + model = runner.model + if is_model_wrapper(model): + model = model.module + print('begin reloading optimized anchors') + + rank, _ = get_dist_info() + + weights = model.state_dict() + anchors_tensor = weights['anchors'] + if rank == 0 and not runner._has_loaded: + runner_dataset = runner.train_dataloader.dataset + self.optimizer.update( + dataset=runner_dataset, + device=runner_dataset[0]['inputs'].device, + input_shape=runner.cfg['img_scale'], + logger=MMLogger.get_current_instance()) + + optimizer = TASK_UTILS.build(self.optimizer) + anchors = optimizer.optimize() + device = next(model.parameters()).device + anchors_tensor = torch.tensor(anchors, device=device) + + broadcast(anchors_tensor) + weights['anchors'] = anchors_tensor + model.load_state_dict(weights) + + self.reinitialize(runner, model) + + def before_val(self, runner: Runner) -> None: + + model = runner.model + if is_model_wrapper(model): + model = model.module + print('begin reloading optimized anchors') + self.reinitialize(runner, model) + + def before_test(self, runner: Runner) -> None: + + model = runner.model + if is_model_wrapper(model): + model = model.module + print('begin reloading optimized anchors') + self.reinitialize(runner, model) + + def reinitialize(self, runner: Runner, model) -> None: + anchors_tensor = model.state_dict()['anchors'] + base_sizes = anchors_tensor.tolist() + device = anchors_tensor.device + prior_generator = runner.cfg.model.bbox_head.prior_generator + prior_generator.update(base_sizes=base_sizes) + + model.bbox_head.prior_generator = TASK_UTILS.build(prior_generator) + + priors_base_sizes = torch.tensor( + base_sizes, dtype=torch.float, device=device) + featmap_strides = torch.tensor( + model.bbox_head.featmap_strides, dtype=torch.float, + device=device)[:, None, None] + model.bbox_head.priors_base_sizes = priors_base_sizes / featmap_strides diff --git a/mmyolo/utils/__init__.py b/mmyolo/utils/__init__.py index f4e968494..d1547fb88 100644 --- a/mmyolo/utils/__init__.py +++ b/mmyolo/utils/__init__.py @@ -1,9 +1,13 @@ # Copyright (c) OpenMMLab. All rights reserved. +from .anchor_optimizers import (YOLODEAnchorOptimizer, + YOLOKMeansAnchorOptimizer, + YOLOV5KMeansAnchorOptimizer) from .collect_env import collect_env from .misc import is_metainfo_lower, switch_to_deploy from .setup_env import register_all_modules __all__ = [ 'register_all_modules', 'collect_env', 'switch_to_deploy', - 'is_metainfo_lower' + 'is_metainfo_lower', 'YOLOKMeansAnchorOptimizer', + 'YOLOV5KMeansAnchorOptimizer', 'YOLODEAnchorOptimizer' ] diff --git a/mmyolo/utils/anchor_optimizers.py b/mmyolo/utils/anchor_optimizers.py new file mode 100644 index 000000000..910775707 --- /dev/null +++ b/mmyolo/utils/anchor_optimizers.py @@ -0,0 +1,526 @@ +# Copyright (c) OpenMMLab. All rights reserved. +"""Optimize anchor settings on a specific dataset. + +This script provides three methods to optimize YOLO anchors including k-means +anchor cluster, differential evolution and v5-k-means. You can use +``--algorithm k-means``, ``--algorithm differential_evolution`` and +``--algorithm v5-k-means`` to switch those methods. + +Example: + Use k-means anchor cluster:: + + python tools/analysis_tools/optimize_anchors.py ${CONFIG} \ + --algorithm k-means --input-shape ${INPUT_SHAPE [WIDTH HEIGHT]} \ + --out-dir ${OUT_DIR} + + Use differential evolution to optimize anchors:: + + python tools/analysis_tools/optimize_anchors.py ${CONFIG} \ + --algorithm differential_evolution \ + --input-shape ${INPUT_SHAPE [WIDTH HEIGHT]} \ + --out-dir ${OUT_DIR} + + Use v5-k-means to optimize anchors:: + + python tools/analysis_tools/optimize_anchors.py ${CONFIG} \ + --algorithm v5-k-means \ + --input-shape ${INPUT_SHAPE [WIDTH HEIGHT]} \ + --prior_match_thr ${PRIOR_MATCH_THR} \ + --out-dir ${OUT_DIR} +""" +import os.path as osp +import random +from typing import Tuple + +import numpy as np +import torch +from mmdet.structures.bbox import (bbox_cxcywh_to_xyxy, bbox_overlaps, + bbox_xyxy_to_cxcywh) +from mmengine.fileio import dump +from mmengine.utils import ProgressBar +from scipy.optimize import differential_evolution +from torch import Tensor + +from mmyolo.registry import TASK_UTILS + +try: + from scipy.cluster.vq import kmeans +except ImportError: + kmeans = None + + +@TASK_UTILS.register_module() +class BaseAnchorOptimizer: + """Base class for anchor optimizer. + + Args: + dataset (obj:`Dataset`): Dataset object. + input_shape (list[int]): Input image shape of the model. + Format in [width, height]. + num_anchor_per_level (list[int]) : Number of anchors for each level. + logger (obj:`logging.Logger`): The logger for logging. + device (str, optional): Device used for calculating. + Default: 'cuda:0' + out_dir (str, optional): Path to save anchor optimize result. + Default: None + """ + + def __init__(self, + dataset, + input_shape, + num_anchor_per_level, + logger, + device='cuda:0', + out_dir=None): + self.dataset = dataset + self.input_shape = input_shape + self.num_anchor_per_level = num_anchor_per_level + self.num_anchors = sum(num_anchor_per_level) + self.logger = logger + self.device = device + self.out_dir = out_dir + bbox_whs, img_shapes = self.get_whs_and_shapes() + ratios = img_shapes.max(1, keepdims=True) / np.array([input_shape]) + + # resize to input shape + self.bbox_whs = bbox_whs / ratios + + def get_whs_and_shapes(self): + """Get widths and heights of bboxes and shapes of images. + + Returns: + tuple[np.ndarray]: Array of bbox shapes and array of image + shapes with shape (num_bboxes, 2) in [width, height] format. + """ + self.logger.info('Collecting bboxes from annotation...') + bbox_whs = [] + img_shapes = [] + prog_bar = ProgressBar(len(self.dataset)) + for idx in range(len(self.dataset)): + data_info = self.dataset.get_data_info(idx) + img_shape = np.array([data_info['width'], data_info['height']]) + gt_instances = data_info['instances'] + for instance in gt_instances: + bbox = np.array(instance['bbox']) + gt_filter_sizes = bbox[2:4] - bbox[0:2] + img_shapes.append(img_shape) + bbox_whs.append(gt_filter_sizes) + + prog_bar.update() + print('\n') + bbox_whs = np.array(bbox_whs) + img_shapes = np.array(img_shapes) + self.logger.info(f'Collected {bbox_whs.shape[0]} bboxes.') + return bbox_whs, img_shapes + + def get_zero_center_bbox_tensor(self): + """Get a tensor of bboxes centered at (0, 0). + + Returns: + Tensor: Tensor of bboxes with shape (num_bboxes, 4) + in [xmin, ymin, xmax, ymax] format. + """ + whs = torch.from_numpy(self.bbox_whs).to( + self.device, dtype=torch.float32) + bboxes = bbox_cxcywh_to_xyxy( + torch.cat([torch.zeros_like(whs), whs], dim=1)) + return bboxes + + def optimize(self): + raise NotImplementedError + + def save_result(self, anchors, path=None): + + anchor_results = [] + start = 0 + for num in self.num_anchor_per_level: + end = num + start + anchor_results.append([(round(w), round(h)) + for w, h in anchors[start:end]]) + start = end + + self.logger.info(f'Anchor optimize result:{anchor_results}') + if path: + json_path = osp.join(path, 'anchor_optimize_result.json') + dump(anchor_results, json_path) + self.logger.info(f'Result saved in {json_path}') + return anchor_results + + +@TASK_UTILS.register_module() +class YOLOKMeansAnchorOptimizer(BaseAnchorOptimizer): + r"""YOLO anchor optimizer using k-means. Code refer to `AlexeyAB/darknet. + `_. + + Args: + iters (int): Maximum iterations for k-means. + """ + + def __init__(self, iters, **kwargs): + + super().__init__(**kwargs) + self.iters = iters + + def optimize(self): + anchors = self.kmeans_anchors() + anchor_results = self.save_result(anchors, self.out_dir) + return anchor_results + + def kmeans_anchors(self): + self.logger.info( + f'Start cluster {self.num_anchors} YOLO anchors with K-means...') + bboxes = self.get_zero_center_bbox_tensor() + cluster_center_idx = torch.randint( + 0, bboxes.shape[0], (self.num_anchors, )).to(self.device) + + assignments = torch.zeros((bboxes.shape[0], )).to(self.device) + cluster_centers = bboxes[cluster_center_idx] + if self.num_anchors == 1: + cluster_centers = self.kmeans_maximization(bboxes, assignments, + cluster_centers) + anchors = bbox_xyxy_to_cxcywh(cluster_centers)[:, 2:].cpu().numpy() + anchors = sorted(anchors, key=lambda x: x[0] * x[1]) + return anchors + + prog_bar = ProgressBar(self.iters) + for i in range(self.iters): + converged, assignments = self.kmeans_expectation( + bboxes, assignments, cluster_centers) + if converged: + self.logger.info(f'K-means process has converged at iter {i}.') + break + cluster_centers = self.kmeans_maximization(bboxes, assignments, + cluster_centers) + prog_bar.update() + print('\n') + avg_iou = bbox_overlaps(bboxes, + cluster_centers).max(1)[0].mean().item() + + anchors = bbox_xyxy_to_cxcywh(cluster_centers)[:, 2:].cpu().numpy() + anchors = sorted(anchors, key=lambda x: x[0] * x[1]) + self.logger.info(f'Anchor cluster finish. Average IOU: {avg_iou}') + + return anchors + + def kmeans_maximization(self, bboxes, assignments, centers): + """Maximization part of EM algorithm(Expectation-Maximization)""" + new_centers = torch.zeros_like(centers) + for i in range(centers.shape[0]): + mask = (assignments == i) + if mask.sum(): + new_centers[i, :] = bboxes[mask].mean(0) + return new_centers + + def kmeans_expectation(self, bboxes, assignments, centers): + """Expectation part of EM algorithm(Expectation-Maximization)""" + ious = bbox_overlaps(bboxes, centers) + closest = ious.argmax(1) + converged = (closest == assignments).all() + return converged, closest + + +@TASK_UTILS.register_module() +class YOLOV5KMeansAnchorOptimizer(BaseAnchorOptimizer): + r"""YOLOv5 anchor optimizer using shape k-means. + Code refer to `ultralytics/yolov5. + `_. + + Args: + iters (int): Maximum iterations for k-means. + prior_match_thr (float): anchor-label width height + ratio threshold hyperparameter. + """ + + def __init__(self, + iters, + prior_match_thr=4.0, + mutation_args=[0.9, 0.1], + augment_args=[0.9, 1.1], + **kwargs): + + super().__init__(**kwargs) + self.iters = iters + self.prior_match_thr = prior_match_thr + [self.mutation_prob, self.mutation_sigma] = mutation_args + [self.augment_min, self.augment_max] = augment_args + + def optimize(self): + self.logger.info( + f'Start cluster {self.num_anchors} YOLOv5 anchors with K-means...') + + bbox_whs = torch.from_numpy(self.bbox_whs).to( + self.device, dtype=torch.float32) + anchors = self.anchor_generate( + bbox_whs, + num=self.num_anchors, + img_size=self.input_shape[0], + prior_match_thr=self.prior_match_thr, + iters=self.iters) + best_ratio, mean_matched = self.anchor_metric(bbox_whs, anchors) + self.logger.info(f'{mean_matched:.2f} anchors/target {best_ratio:.3f} ' + 'Best Possible Recall (BPR). ') + anchor_results = self.save_result(anchors.tolist(), self.out_dir) + return anchor_results + + def anchor_generate(self, + box_size: Tensor, + num: int = 9, + img_size: int = 640, + prior_match_thr: float = 4.0, + iters: int = 1000) -> Tensor: + """cluster boxes metric with anchors. + + Args: + box_size (Tensor): The size of the bxes, which shape is + (box_num, 2),the number 2 means width and height. + num (int): number of anchors. + img_size (int): image size used for training + prior_match_thr (float): width/height ratio threshold + used for training + iters (int): iterations to evolve anchors using genetic algorithm + + Returns: + anchors (Tensor): kmeans evolved anchors + """ + + thr = 1 / prior_match_thr + + # step1: filter small bbox + box_size = self._filter_box(box_size) + assert num <= len(box_size) + + # step2: init anchors + if kmeans: + try: + self.logger.info( + 'beginning init anchors with scipy kmeans method') + # sigmas for whitening + sigmas = box_size.std(0).cpu().numpy() + anchors = kmeans( + box_size.cpu().numpy() / sigmas, num, iter=30)[0] * sigmas + # kmeans may return fewer points than requested + # if width/height is insufficient or too similar + assert num == len(anchors) + except Exception: + self.logger.warning( + 'scipy kmeans method cannot get enough points ' + 'because of width/height is insufficient or too similar, ' + 'now switching strategies from kmeans to random init.') + anchors = np.sort(np.random.rand(num * 2)).reshape( + num, 2) * img_size + else: + self.logger.info( + 'cannot found scipy package, switching strategies from kmeans ' + 'to random init, you can install scipy package to ' + 'get better anchor init') + anchors = np.sort(np.random.rand(num * 2)).reshape(num, + 2) * img_size + + self.logger.info('init done, beginning evolve anchors...') + # sort small to large + anchors = torch.tensor(anchors[np.argsort(anchors.prod(1))]).to( + box_size.device, dtype=torch.float32) + + # step3: evolve anchors use Genetic Algorithm + prog_bar = ProgressBar(iters) + fitness = self._anchor_fitness(box_size, anchors, thr) + cluster_shape = anchors.shape + + for _ in range(iters): + mutate_result = np.ones(cluster_shape) + # mutate until a change occurs (prevent duplicates) + while (mutate_result == 1).all(): + # mutate_result is scale factor of anchors, between 0.3 and 3 + mutate_result = ( + (np.random.random(cluster_shape) < self.mutation_prob) * + random.random() * np.random.randn(*cluster_shape) * + self.mutation_sigma + 1).clip(0.3, 3.0) + mutate_result = torch.from_numpy(mutate_result).to(box_size.device) + new_anchors = (anchors.clone() * mutate_result).clip(min=2.0) + new_fitness = self._anchor_fitness(box_size, new_anchors, thr) + if new_fitness > fitness: + fitness = new_fitness + anchors = new_anchors.clone() + + prog_bar.update() + print('\n') + # sort small to large + anchors = anchors[torch.argsort(anchors.prod(1))] + self.logger.info(f'Anchor cluster finish. fitness = {fitness:.4f}') + + return anchors + + def anchor_metric(self, + box_size: Tensor, + anchors: Tensor, + threshold: float = 4.0) -> Tuple: + """compute boxes metric with anchors. + + Args: + box_size (Tensor): The size of the bxes, which shape + is (box_num, 2), the number 2 means width and height. + anchors (Tensor): The size of the bxes, which shape + is (anchor_num, 2), the number 2 means width and height. + threshold (float): the compare threshold of ratio + + Returns: + Tuple: a tuple of metric result, best_ratio_mean and mean_matched + """ + # step1: augment scale + # According to the uniform distribution,the scaling scale between + # augment_min and augment_max is randomly generated + scale = np.random.uniform( + self.augment_min, self.augment_max, size=(box_size.shape[0], 1)) + box_size = torch.tensor( + np.array( + [l[:, ] * s for s, l in zip(scale, + box_size.cpu().numpy())])).to( + box_size.device, + dtype=torch.float32) + # step2: calculate ratio + min_ratio, best_ratio = self._metric(box_size, anchors) + mean_matched = (min_ratio > 1 / threshold).float().sum(1).mean() + best_ratio_mean = (best_ratio > 1 / threshold).float().mean() + return best_ratio_mean, mean_matched + + def _filter_box(self, box_size: Tensor) -> Tensor: + small_cnt = (box_size < 3.0).any(1).sum() + if small_cnt: + self.logger.warning( + f'Extremely small objects found: {small_cnt} ' + f'of {len(box_size)} labels are <3 pixels in size') + # filter > 2 pixels + filter_sizes = box_size[(box_size >= 2.0).any(1)] + return filter_sizes + + def _anchor_fitness(self, box_size: Tensor, anchors: Tensor, thr: float): + """mutation fitness.""" + _, best = self._metric(box_size, anchors) + return (best * (best > thr).float()).mean() + + def _metric(self, box_size: Tensor, anchors: Tensor) -> Tuple: + """compute boxes metric with anchors. + + Args: + box_size (Tensor): The size of the bxes, which shape is + (box_num, 2), the number 2 means width and height. + anchors (Tensor): The size of the bxes, which shape is + (anchor_num, 2), the number 2 means width and height. + + Returns: + Tuple: a tuple of metric result, min_ratio and best_ratio + """ + + # ratio means the (width_1/width_2 and height_1/height_2) ratio of each + # box and anchor, the ratio shape is torch.Size([box_num,anchor_num,2]) + ratio = box_size[:, None] / anchors[None] + + # min_ratio records the min ratio of each box with all anchor, + # min_ratio.shape is torch.Size([box_num,anchor_num]) + # notice: + # smaller ratio means worse shape-match between boxes and anchors + min_ratio = torch.min(ratio, 1 / ratio).min(2)[0] + + # find the best shape-match ratio for each box + # box_best_ratio.shape is torch.Size([box_num]) + best_ratio = min_ratio.max(1)[0] + + return min_ratio, best_ratio + + +@TASK_UTILS.register_module() +class YOLODEAnchorOptimizer(BaseAnchorOptimizer): + """YOLO anchor optimizer using differential evolution algorithm. + + Args: + iters (int): Maximum iterations for k-means. + strategy (str): The differential evolution strategy to use. + Should be one of: + + - 'best1bin' + - 'best1exp' + - 'rand1exp' + - 'randtobest1exp' + - 'currenttobest1exp' + - 'best2exp' + - 'rand2exp' + - 'randtobest1bin' + - 'currenttobest1bin' + - 'best2bin' + - 'rand2bin' + - 'rand1bin' + + Default: 'best1bin'. + population_size (int): Total population size of evolution algorithm. + Default: 15. + convergence_thr (float): Tolerance for convergence, the + optimizing stops when ``np.std(pop) <= abs(convergence_thr) + + convergence_thr * np.abs(np.mean(population_energies))``, + respectively. Default: 0.0001. + mutation (tuple[float]): Range of dithering randomly changes the + mutation constant. Default: (0.5, 1). + recombination (float): Recombination constant of crossover probability. + Default: 0.7. + """ + + def __init__(self, + iters, + strategy='best1bin', + population_size=15, + convergence_thr=0.0001, + mutation=(0.5, 1), + recombination=0.7, + **kwargs): + + super().__init__(**kwargs) + + self.iters = iters + self.strategy = strategy + self.population_size = population_size + self.convergence_thr = convergence_thr + self.mutation = mutation + self.recombination = recombination + + def optimize(self): + anchors = self.differential_evolution() + anchor_results = self.save_result(anchors, self.out_dir) + return anchor_results + + def differential_evolution(self): + bboxes = self.get_zero_center_bbox_tensor() + + bounds = [] + for i in range(self.num_anchors): + bounds.extend([(0, self.input_shape[0]), (0, self.input_shape[1])]) + + result = differential_evolution( + func=self.avg_iou_cost, + bounds=bounds, + args=(bboxes, ), + strategy=self.strategy, + maxiter=self.iters, + popsize=self.population_size, + tol=self.convergence_thr, + mutation=self.mutation, + recombination=self.recombination, + updating='immediate', + disp=True) + self.logger.info( + f'Anchor evolution finish. Average IOU: {1 - result.fun}') + anchors = [(w, h) for w, h in zip(result.x[::2], result.x[1::2])] + anchors = sorted(anchors, key=lambda x: x[0] * x[1]) + return anchors + + @staticmethod + def avg_iou_cost(anchor_params, bboxes): + assert len(anchor_params) % 2 == 0 + anchor_whs = torch.tensor( + [[w, h] + for w, h in zip(anchor_params[::2], anchor_params[1::2])]).to( + bboxes.device, dtype=bboxes.dtype) + anchor_boxes = bbox_cxcywh_to_xyxy( + torch.cat([torch.zeros_like(anchor_whs), anchor_whs], dim=1)) + ious = bbox_overlaps(bboxes, anchor_boxes) + max_ious, _ = ious.max(1) + cost = 1 - max_ious.mean().item() + return cost diff --git a/tools/analysis_tools/optimize_anchors.py b/tools/analysis_tools/optimize_anchors.py index 34d4d067a..689c8f64a 100644 --- a/tools/analysis_tools/optimize_anchors.py +++ b/tools/analysis_tools/optimize_anchors.py @@ -29,29 +29,15 @@ --out-dir ${OUT_DIR} """ import argparse -import os.path as osp -import random -from typing import Tuple -import numpy as np -import torch -from mmdet.structures.bbox import (bbox_cxcywh_to_xyxy, bbox_overlaps, - bbox_xyxy_to_cxcywh) from mmdet.utils import replace_cfg_vals, update_data_root from mmengine.config import Config -from mmengine.fileio import dump from mmengine.logging import MMLogger from mmengine.registry import init_default_scope -from mmengine.utils import ProgressBar -from scipy.optimize import differential_evolution -from torch import Tensor from mmyolo.registry import DATASETS - -try: - from scipy.cluster.vq import kmeans -except ImportError: - kmeans = None +from mmyolo.utils import (YOLODEAnchorOptimizer, YOLOKMeansAnchorOptimizer, + YOLOV5KMeansAnchorOptimizer) def parse_args(): @@ -107,475 +93,6 @@ def parse_args(): return args -class BaseAnchorOptimizer: - """Base class for anchor optimizer. - - Args: - dataset (obj:`Dataset`): Dataset object. - input_shape (list[int]): Input image shape of the model. - Format in [width, height]. - num_anchor_per_level (list[int]) : Number of anchors for each level. - logger (obj:`logging.Logger`): The logger for logging. - device (str, optional): Device used for calculating. - Default: 'cuda:0' - out_dir (str, optional): Path to save anchor optimize result. - Default: None - """ - - def __init__(self, - dataset, - input_shape, - num_anchor_per_level, - logger, - device='cuda:0', - out_dir=None): - self.dataset = dataset - self.input_shape = input_shape - self.num_anchor_per_level = num_anchor_per_level - self.num_anchors = sum(num_anchor_per_level) - self.logger = logger - self.device = device - self.out_dir = out_dir - bbox_whs, img_shapes = self.get_whs_and_shapes() - ratios = img_shapes.max(1, keepdims=True) / np.array([input_shape]) - - # resize to input shape - self.bbox_whs = bbox_whs / ratios - - def get_whs_and_shapes(self): - """Get widths and heights of bboxes and shapes of images. - - Returns: - tuple[np.ndarray]: Array of bbox shapes and array of image - shapes with shape (num_bboxes, 2) in [width, height] format. - """ - self.logger.info('Collecting bboxes from annotation...') - bbox_whs = [] - img_shapes = [] - prog_bar = ProgressBar(len(self.dataset)) - for idx in range(len(self.dataset)): - data_info = self.dataset.get_data_info(idx) - img_shape = np.array([data_info['width'], data_info['height']]) - gt_instances = data_info['instances'] - for instance in gt_instances: - bbox = np.array(instance['bbox']) - gt_filter_sizes = bbox[2:4] - bbox[0:2] - img_shapes.append(img_shape) - bbox_whs.append(gt_filter_sizes) - - prog_bar.update() - print('\n') - bbox_whs = np.array(bbox_whs) - img_shapes = np.array(img_shapes) - self.logger.info(f'Collected {bbox_whs.shape[0]} bboxes.') - return bbox_whs, img_shapes - - def get_zero_center_bbox_tensor(self): - """Get a tensor of bboxes centered at (0, 0). - - Returns: - Tensor: Tensor of bboxes with shape (num_bboxes, 4) - in [xmin, ymin, xmax, ymax] format. - """ - whs = torch.from_numpy(self.bbox_whs).to( - self.device, dtype=torch.float32) - bboxes = bbox_cxcywh_to_xyxy( - torch.cat([torch.zeros_like(whs), whs], dim=1)) - return bboxes - - def optimize(self): - raise NotImplementedError - - def save_result(self, anchors, path=None): - - anchor_results = [] - start = 0 - for num in self.num_anchor_per_level: - end = num + start - anchor_results.append([(round(w), round(h)) - for w, h in anchors[start:end]]) - start = end - - self.logger.info(f'Anchor optimize result:{anchor_results}') - if path: - json_path = osp.join(path, 'anchor_optimize_result.json') - dump(anchor_results, json_path) - self.logger.info(f'Result saved in {json_path}') - - -class YOLOKMeansAnchorOptimizer(BaseAnchorOptimizer): - r"""YOLO anchor optimizer using k-means. Code refer to `AlexeyAB/darknet. - `_. - - Args: - iters (int): Maximum iterations for k-means. - """ - - def __init__(self, iters, **kwargs): - - super().__init__(**kwargs) - self.iters = iters - - def optimize(self): - anchors = self.kmeans_anchors() - self.save_result(anchors, self.out_dir) - - def kmeans_anchors(self): - self.logger.info( - f'Start cluster {self.num_anchors} YOLO anchors with K-means...') - bboxes = self.get_zero_center_bbox_tensor() - cluster_center_idx = torch.randint( - 0, bboxes.shape[0], (self.num_anchors, )).to(self.device) - - assignments = torch.zeros((bboxes.shape[0], )).to(self.device) - cluster_centers = bboxes[cluster_center_idx] - if self.num_anchors == 1: - cluster_centers = self.kmeans_maximization(bboxes, assignments, - cluster_centers) - anchors = bbox_xyxy_to_cxcywh(cluster_centers)[:, 2:].cpu().numpy() - anchors = sorted(anchors, key=lambda x: x[0] * x[1]) - return anchors - - prog_bar = ProgressBar(self.iters) - for i in range(self.iters): - converged, assignments = self.kmeans_expectation( - bboxes, assignments, cluster_centers) - if converged: - self.logger.info(f'K-means process has converged at iter {i}.') - break - cluster_centers = self.kmeans_maximization(bboxes, assignments, - cluster_centers) - prog_bar.update() - print('\n') - avg_iou = bbox_overlaps(bboxes, - cluster_centers).max(1)[0].mean().item() - - anchors = bbox_xyxy_to_cxcywh(cluster_centers)[:, 2:].cpu().numpy() - anchors = sorted(anchors, key=lambda x: x[0] * x[1]) - self.logger.info(f'Anchor cluster finish. Average IOU: {avg_iou}') - - return anchors - - def kmeans_maximization(self, bboxes, assignments, centers): - """Maximization part of EM algorithm(Expectation-Maximization)""" - new_centers = torch.zeros_like(centers) - for i in range(centers.shape[0]): - mask = (assignments == i) - if mask.sum(): - new_centers[i, :] = bboxes[mask].mean(0) - return new_centers - - def kmeans_expectation(self, bboxes, assignments, centers): - """Expectation part of EM algorithm(Expectation-Maximization)""" - ious = bbox_overlaps(bboxes, centers) - closest = ious.argmax(1) - converged = (closest == assignments).all() - return converged, closest - - -class YOLOV5KMeansAnchorOptimizer(BaseAnchorOptimizer): - r"""YOLOv5 anchor optimizer using shape k-means. - Code refer to `ultralytics/yolov5. - `_. - - Args: - iters (int): Maximum iterations for k-means. - prior_match_thr (float): anchor-label width height - ratio threshold hyperparameter. - """ - - def __init__(self, - iters, - prior_match_thr=4.0, - mutation_args=[0.9, 0.1], - augment_args=[0.9, 1.1], - **kwargs): - - super().__init__(**kwargs) - self.iters = iters - self.prior_match_thr = prior_match_thr - [self.mutation_prob, self.mutation_sigma] = mutation_args - [self.augment_min, self.augment_max] = augment_args - - def optimize(self): - self.logger.info( - f'Start cluster {self.num_anchors} YOLOv5 anchors with K-means...') - - bbox_whs = torch.from_numpy(self.bbox_whs).to( - self.device, dtype=torch.float32) - anchors = self.anchor_generate( - bbox_whs, - num=self.num_anchors, - img_size=self.input_shape[0], - prior_match_thr=self.prior_match_thr, - iters=self.iters) - best_ratio, mean_matched = self.anchor_metric(bbox_whs, anchors) - self.logger.info(f'{mean_matched:.2f} anchors/target {best_ratio:.3f} ' - 'Best Possible Recall (BPR). ') - self.save_result(anchors.tolist(), self.out_dir) - - def anchor_generate(self, - box_size: Tensor, - num: int = 9, - img_size: int = 640, - prior_match_thr: float = 4.0, - iters: int = 1000) -> Tensor: - """cluster boxes metric with anchors. - - Args: - box_size (Tensor): The size of the bxes, which shape is - (box_num, 2),the number 2 means width and height. - num (int): number of anchors. - img_size (int): image size used for training - prior_match_thr (float): width/height ratio threshold - used for training - iters (int): iterations to evolve anchors using genetic algorithm - - Returns: - anchors (Tensor): kmeans evolved anchors - """ - - thr = 1 / prior_match_thr - - # step1: filter small bbox - box_size = self._filter_box(box_size) - assert num <= len(box_size) - - # step2: init anchors - if kmeans: - try: - self.logger.info( - 'beginning init anchors with scipy kmeans method') - # sigmas for whitening - sigmas = box_size.std(0).cpu().numpy() - anchors = kmeans( - box_size.cpu().numpy() / sigmas, num, iter=30)[0] * sigmas - # kmeans may return fewer points than requested - # if width/height is insufficient or too similar - assert num == len(anchors) - except Exception: - self.logger.warning( - 'scipy kmeans method cannot get enough points ' - 'because of width/height is insufficient or too similar, ' - 'now switching strategies from kmeans to random init.') - anchors = np.sort(np.random.rand(num * 2)).reshape( - num, 2) * img_size - else: - self.logger.info( - 'cannot found scipy package, switching strategies from kmeans ' - 'to random init, you can install scipy package to ' - 'get better anchor init') - anchors = np.sort(np.random.rand(num * 2)).reshape(num, - 2) * img_size - - self.logger.info('init done, beginning evolve anchors...') - # sort small to large - anchors = torch.tensor(anchors[np.argsort(anchors.prod(1))]).to( - box_size.device, dtype=torch.float32) - - # step3: evolve anchors use Genetic Algorithm - prog_bar = ProgressBar(iters) - fitness = self._anchor_fitness(box_size, anchors, thr) - cluster_shape = anchors.shape - - for _ in range(iters): - mutate_result = np.ones(cluster_shape) - # mutate until a change occurs (prevent duplicates) - while (mutate_result == 1).all(): - # mutate_result is scale factor of anchors, between 0.3 and 3 - mutate_result = ( - (np.random.random(cluster_shape) < self.mutation_prob) * - random.random() * np.random.randn(*cluster_shape) * - self.mutation_sigma + 1).clip(0.3, 3.0) - mutate_result = torch.from_numpy(mutate_result).to(box_size.device) - new_anchors = (anchors.clone() * mutate_result).clip(min=2.0) - new_fitness = self._anchor_fitness(box_size, new_anchors, thr) - if new_fitness > fitness: - fitness = new_fitness - anchors = new_anchors.clone() - - prog_bar.update() - print('\n') - # sort small to large - anchors = anchors[torch.argsort(anchors.prod(1))] - self.logger.info(f'Anchor cluster finish. fitness = {fitness:.4f}') - - return anchors - - def anchor_metric(self, - box_size: Tensor, - anchors: Tensor, - threshold: float = 4.0) -> Tuple: - """compute boxes metric with anchors. - - Args: - box_size (Tensor): The size of the bxes, which shape - is (box_num, 2), the number 2 means width and height. - anchors (Tensor): The size of the bxes, which shape - is (anchor_num, 2), the number 2 means width and height. - threshold (float): the compare threshold of ratio - - Returns: - Tuple: a tuple of metric result, best_ratio_mean and mean_matched - """ - # step1: augment scale - # According to the uniform distribution,the scaling scale between - # augment_min and augment_max is randomly generated - scale = np.random.uniform( - self.augment_min, self.augment_max, size=(box_size.shape[0], 1)) - box_size = torch.tensor( - np.array( - [l[:, ] * s for s, l in zip(scale, - box_size.cpu().numpy())])).to( - box_size.device, - dtype=torch.float32) - # step2: calculate ratio - min_ratio, best_ratio = self._metric(box_size, anchors) - mean_matched = (min_ratio > 1 / threshold).float().sum(1).mean() - best_ratio_mean = (best_ratio > 1 / threshold).float().mean() - return best_ratio_mean, mean_matched - - def _filter_box(self, box_size: Tensor) -> Tensor: - small_cnt = (box_size < 3.0).any(1).sum() - if small_cnt: - self.logger.warning( - f'Extremely small objects found: {small_cnt} ' - f'of {len(box_size)} labels are <3 pixels in size') - # filter > 2 pixels - filter_sizes = box_size[(box_size >= 2.0).any(1)] - return filter_sizes - - def _anchor_fitness(self, box_size: Tensor, anchors: Tensor, thr: float): - """mutation fitness.""" - _, best = self._metric(box_size, anchors) - return (best * (best > thr).float()).mean() - - def _metric(self, box_size: Tensor, anchors: Tensor) -> Tuple: - """compute boxes metric with anchors. - - Args: - box_size (Tensor): The size of the bxes, which shape is - (box_num, 2), the number 2 means width and height. - anchors (Tensor): The size of the bxes, which shape is - (anchor_num, 2), the number 2 means width and height. - - Returns: - Tuple: a tuple of metric result, min_ratio and best_ratio - """ - - # ratio means the (width_1/width_2 and height_1/height_2) ratio of each - # box and anchor, the ratio shape is torch.Size([box_num,anchor_num,2]) - ratio = box_size[:, None] / anchors[None] - - # min_ratio records the min ratio of each box with all anchor, - # min_ratio.shape is torch.Size([box_num,anchor_num]) - # notice: - # smaller ratio means worse shape-match between boxes and anchors - min_ratio = torch.min(ratio, 1 / ratio).min(2)[0] - - # find the best shape-match ratio for each box - # box_best_ratio.shape is torch.Size([box_num]) - best_ratio = min_ratio.max(1)[0] - - return min_ratio, best_ratio - - -class YOLODEAnchorOptimizer(BaseAnchorOptimizer): - """YOLO anchor optimizer using differential evolution algorithm. - - Args: - iters (int): Maximum iterations for k-means. - strategy (str): The differential evolution strategy to use. - Should be one of: - - - 'best1bin' - - 'best1exp' - - 'rand1exp' - - 'randtobest1exp' - - 'currenttobest1exp' - - 'best2exp' - - 'rand2exp' - - 'randtobest1bin' - - 'currenttobest1bin' - - 'best2bin' - - 'rand2bin' - - 'rand1bin' - - Default: 'best1bin'. - population_size (int): Total population size of evolution algorithm. - Default: 15. - convergence_thr (float): Tolerance for convergence, the - optimizing stops when ``np.std(pop) <= abs(convergence_thr) - + convergence_thr * np.abs(np.mean(population_energies))``, - respectively. Default: 0.0001. - mutation (tuple[float]): Range of dithering randomly changes the - mutation constant. Default: (0.5, 1). - recombination (float): Recombination constant of crossover probability. - Default: 0.7. - """ - - def __init__(self, - iters, - strategy='best1bin', - population_size=15, - convergence_thr=0.0001, - mutation=(0.5, 1), - recombination=0.7, - **kwargs): - - super().__init__(**kwargs) - - self.iters = iters - self.strategy = strategy - self.population_size = population_size - self.convergence_thr = convergence_thr - self.mutation = mutation - self.recombination = recombination - - def optimize(self): - anchors = self.differential_evolution() - self.save_result(anchors, self.out_dir) - - def differential_evolution(self): - bboxes = self.get_zero_center_bbox_tensor() - - bounds = [] - for i in range(self.num_anchors): - bounds.extend([(0, self.input_shape[0]), (0, self.input_shape[1])]) - - result = differential_evolution( - func=self.avg_iou_cost, - bounds=bounds, - args=(bboxes, ), - strategy=self.strategy, - maxiter=self.iters, - popsize=self.population_size, - tol=self.convergence_thr, - mutation=self.mutation, - recombination=self.recombination, - updating='immediate', - disp=True) - self.logger.info( - f'Anchor evolution finish. Average IOU: {1 - result.fun}') - anchors = [(w, h) for w, h in zip(result.x[::2], result.x[1::2])] - anchors = sorted(anchors, key=lambda x: x[0] * x[1]) - return anchors - - @staticmethod - def avg_iou_cost(anchor_params, bboxes): - assert len(anchor_params) % 2 == 0 - anchor_whs = torch.tensor( - [[w, h] - for w, h in zip(anchor_params[::2], anchor_params[1::2])]).to( - bboxes.device, dtype=bboxes.dtype) - anchor_boxes = bbox_cxcywh_to_xyxy( - torch.cat([torch.zeros_like(anchor_whs), anchor_whs], dim=1)) - ious = bbox_overlaps(bboxes, anchor_boxes) - max_ious, _ = ious.max(1) - cost = 1 - max_ious.mean().item() - return cost - - def main(): logger = MMLogger.get_current_instance() args = parse_args() diff --git a/tools/test.py b/tools/test.py index c05defe3c..f8076d7c0 100644 --- a/tools/test.py +++ b/tools/test.py @@ -35,6 +35,12 @@ def parse_args(): '--tta', action='store_true', help='Whether to use test time augmentation') + parser.add_argument( + '--autoanchor', + choices=[ + 'k_means_autoanchor', 'de_autoanchor', 'v5_k_means_autoanchor' + ], + help='types of autoanchor') parser.add_argument( '--show', action='store_true', help='show prediction results') parser.add_argument( @@ -98,6 +104,11 @@ def main(): if args.deploy: cfg.custom_hooks.append(dict(type='SwitchToDeployHook')) + if args.autoanchor: + assert cfg.model.bbox_head.prior_generator.type \ + == 'mmdet.YOLOAnchorGenerator' + cfg.custom_hooks.append(cfg.autoanchor_hook) + # add `format_only` and `outfile_prefix` into cfg if args.json_prefix is not None: cfg_json = { @@ -126,6 +137,16 @@ def main(): test_data_cfg.batch_shapes_cfg = None test_data_cfg.pipeline = cfg.tta_pipeline + if args.autoanchor: + assert cfg.model.bbox_head.prior_generator.type \ + == 'mmdet.YOLOAnchorGenerator' + assert args.autoanchor in [ + 'k_means_autoanchor', 'de_autoanchor', + 'v5_k_means_autoanchor'], \ + 'only k_means_autoanchor, de_autoanchor, v5_k_means_autoanchor ' \ + 'are supported !' + cfg.custom_hooks.append(cfg.get(args.autoanchor)) + # build the runner from config if 'runner_type' not in cfg: # build the default runner diff --git a/tools/train.py b/tools/train.py index 1060b631a..25680344e 100644 --- a/tools/train.py +++ b/tools/train.py @@ -29,6 +29,8 @@ def parse_args(): help='If specify checkpoint path, resume from it, while if not ' 'specify, try to auto resume from the latest checkpoint ' 'in the work directory.') + parser.add_argument( + '--autoanchor', action='store_true', help='types of autoanchor') parser.add_argument( '--cfg-options', nargs='+', @@ -87,6 +89,11 @@ def main(): cfg.optim_wrapper.type = 'AmpOptimWrapper' cfg.optim_wrapper.loss_scale = 'dynamic' + if args.autoanchor: + assert cfg.model.bbox_head.prior_generator.type \ + == 'mmdet.YOLOAnchorGenerator' + cfg.custom_hooks.append(cfg.autoanchor_hook) + # resume is determined in this priority: resume from > auto_resume if args.resume == 'auto': cfg.resume = True