YOLOW

Sleeping

YOLOW / third_party /mmyolo /tools /analysis_tools /optimize_anchors.py

stevengrove

initial commit

186701e 9 months ago

24.3 kB

	# Copyright (c) OpenMMLab. All rights reserved.
	"""Optimize anchor settings on a specific dataset.

	This script provides three methods to optimize YOLO anchors including k-means
	anchor cluster, differential evolution and v5-k-means. You can use
	``--algorithm k-means``, ``--algorithm differential_evolution`` and
	``--algorithm v5-k-means`` to switch those methods.

	Example:
	Use k-means anchor cluster::

	python tools/analysis_tools/optimize_anchors.py ${CONFIG} \
	--algorithm k-means --input-shape ${INPUT_SHAPE [WIDTH HEIGHT]} \
	--out-dir ${OUT_DIR}

	Use differential evolution to optimize anchors::

	python tools/analysis_tools/optimize_anchors.py ${CONFIG} \
	--algorithm differential_evolution \
	--input-shape ${INPUT_SHAPE [WIDTH HEIGHT]} \
	--out-dir ${OUT_DIR}

	Use v5-k-means to optimize anchors::

	python tools/analysis_tools/optimize_anchors.py ${CONFIG} \
	--algorithm v5-k-means \
	--input-shape ${INPUT_SHAPE [WIDTH HEIGHT]} \
	--prior_match_thr ${PRIOR_MATCH_THR} \
	--out-dir ${OUT_DIR}
	"""
	import argparse
	import os.path as osp
	import random
	from typing import Tuple

	import numpy as np
	import torch
	from mmdet.structures.bbox import (bbox_cxcywh_to_xyxy, bbox_overlaps,
	bbox_xyxy_to_cxcywh)
	from mmdet.utils import replace_cfg_vals, update_data_root
	from mmengine.config import Config
	from mmengine.fileio import dump
	from mmengine.logging import MMLogger
	from mmengine.registry import init_default_scope
	from mmengine.utils import ProgressBar
	from scipy.optimize import differential_evolution
	from torch import Tensor

	from mmyolo.registry import DATASETS

	try:
	from scipy.cluster.vq import kmeans
	except ImportError:
	kmeans = None


	def parse_args():
	parser = argparse.ArgumentParser(description='Optimize anchor parameters.')
	parser.add_argument('config', help='Train config file path.')
	parser.add_argument(
	'--input-shape',
	type=int,
	nargs='+',
	default=[640, 640],
	help='input image size, represent [width, height]')
	parser.add_argument(
	'--algorithm',
	default='DE',
	help='Algorithm used for anchor optimizing.'
	'Support k-means and differential_evolution for YOLO,'
	'and v5-k-means is special for YOLOV5.')
	parser.add_argument(
	'--iters',
	default=1000,
	type=int,
	help='Maximum iterations for optimizer.')
	parser.add_argument(
	'--prior-match-thr',
	default=4.0,
	type=float,
	help='anchor-label `gt_filter_sizes` ratio threshold '
	'hyperparameter used for training, default=4.0, this '
	'parameter is unique to v5-k-means')
	parser.add_argument(
	'--mutation-args',
	type=float,
	nargs='+',
	default=[0.9, 0.1],
	help='paramter of anchor optimize method genetic algorithm, '
	'represent [prob, sigma], this parameter is unique to v5-k-means')
	parser.add_argument(
	'--augment-args',
	type=float,
	nargs='+',
	default=[0.9, 1.1],
	help='scale factor of box size augment when metric box and anchor, '
	'represent [min, max], this parameter is unique to v5-k-means')
	parser.add_argument(
	'--device', default='cuda:0', help='Device used for calculating.')
	parser.add_argument(
	'--out-dir',
	default=None,
	type=str,
	help='Path to save anchor optimize result.')

	args = parser.parse_args()
	return args


	class BaseAnchorOptimizer:
	"""Base class for anchor optimizer.

	Args:
	dataset (obj:`Dataset`): Dataset object.
	input_shape (list[int]): Input image shape of the model.
	Format in [width, height].
	num_anchor_per_level (list[int]) : Number of anchors for each level.
	logger (obj:`logging.Logger`): The logger for logging.
	device (str, optional): Device used for calculating.
	Default: 'cuda:0'
	out_dir (str, optional): Path to save anchor optimize result.
	Default: None
	"""

	def __init__(self,
	dataset,
	input_shape,
	num_anchor_per_level,
	logger,
	device='cuda:0',
	out_dir=None):
	self.dataset = dataset
	self.input_shape = input_shape
	self.num_anchor_per_level = num_anchor_per_level
	self.num_anchors = sum(num_anchor_per_level)
	self.logger = logger
	self.device = device
	self.out_dir = out_dir
	bbox_whs, img_shapes = self.get_whs_and_shapes()
	ratios = img_shapes.max(1, keepdims=True) / np.array([input_shape])

	# resize to input shape
	self.bbox_whs = bbox_whs / ratios

	def get_whs_and_shapes(self):
	"""Get widths and heights of bboxes and shapes of images.

	Returns:
	tuple[np.ndarray]: Array of bbox shapes and array of image
	shapes with shape (num_bboxes, 2) in [width, height] format.
	"""
	self.logger.info('Collecting bboxes from annotation...')
	bbox_whs = []
	img_shapes = []
	prog_bar = ProgressBar(len(self.dataset))
	for idx in range(len(self.dataset)):
	data_info = self.dataset.get_data_info(idx)
	img_shape = np.array([data_info['width'], data_info['height']])
	gt_instances = data_info['instances']
	for instance in gt_instances:
	bbox = np.array(instance['bbox'])
	gt_filter_sizes = bbox[2:4] - bbox[0:2]
	img_shapes.append(img_shape)
	bbox_whs.append(gt_filter_sizes)

	prog_bar.update()
	print('\n')
	bbox_whs = np.array(bbox_whs)
	img_shapes = np.array(img_shapes)
	self.logger.info(f'Collected {bbox_whs.shape[0]} bboxes.')
	return bbox_whs, img_shapes

	def get_zero_center_bbox_tensor(self):
	"""Get a tensor of bboxes centered at (0, 0).

	Returns:
	Tensor: Tensor of bboxes with shape (num_bboxes, 4)
	in [xmin, ymin, xmax, ymax] format.
	"""
	whs = torch.from_numpy(self.bbox_whs).to(
	self.device, dtype=torch.float32)
	bboxes = bbox_cxcywh_to_xyxy(
	torch.cat([torch.zeros_like(whs), whs], dim=1))
	return bboxes

	def optimize(self):
	raise NotImplementedError

	def save_result(self, anchors, path=None):

	anchor_results = []
	start = 0
	for num in self.num_anchor_per_level:
	end = num + start
	anchor_results.append([(round(w), round(h))
	for w, h in anchors[start:end]])
	start = end

	self.logger.info(f'Anchor optimize result:{anchor_results}')
	if path:
	json_path = osp.join(path, 'anchor_optimize_result.json')
	dump(anchor_results, json_path)
	self.logger.info(f'Result saved in {json_path}')


	class YOLOKMeansAnchorOptimizer(BaseAnchorOptimizer):
	r"""YOLO anchor optimizer using k-means. Code refer to `AlexeyAB/darknet.
	<https://github.com/AlexeyAB/darknet/blob/master/src/detector.c>`_.

	Args:
	iters (int): Maximum iterations for k-means.
	"""

	def __init__(self, iters, **kwargs):

	super().__init__(**kwargs)
	self.iters = iters

	def optimize(self):
	anchors = self.kmeans_anchors()
	self.save_result(anchors, self.out_dir)

	def kmeans_anchors(self):
	self.logger.info(
	f'Start cluster {self.num_anchors} YOLO anchors with K-means...')
	bboxes = self.get_zero_center_bbox_tensor()
	cluster_center_idx = torch.randint(
	0, bboxes.shape[0], (self.num_anchors, )).to(self.device)

	assignments = torch.zeros((bboxes.shape[0], )).to(self.device)
	cluster_centers = bboxes[cluster_center_idx]
	if self.num_anchors == 1:
	cluster_centers = self.kmeans_maximization(bboxes, assignments,
	cluster_centers)
	anchors = bbox_xyxy_to_cxcywh(cluster_centers)[:, 2:].cpu().numpy()
	anchors = sorted(anchors, key=lambda x: x[0] * x[1])
	return anchors

	prog_bar = ProgressBar(self.iters)
	for i in range(self.iters):
	converged, assignments = self.kmeans_expectation(
	bboxes, assignments, cluster_centers)
	if converged:
	self.logger.info(f'K-means process has converged at iter {i}.')
	break
	cluster_centers = self.kmeans_maximization(bboxes, assignments,
	cluster_centers)
	prog_bar.update()
	print('\n')
	avg_iou = bbox_overlaps(bboxes,
	cluster_centers).max(1)[0].mean().item()

	anchors = bbox_xyxy_to_cxcywh(cluster_centers)[:, 2:].cpu().numpy()
	anchors = sorted(anchors, key=lambda x: x[0] * x[1])
	self.logger.info(f'Anchor cluster finish. Average IOU: {avg_iou}')

	return anchors

	def kmeans_maximization(self, bboxes, assignments, centers):
	"""Maximization part of EM algorithm(Expectation-Maximization)"""
	new_centers = torch.zeros_like(centers)
	for i in range(centers.shape[0]):
	mask = (assignments == i)
	if mask.sum():
	new_centers[i, :] = bboxes[mask].mean(0)
	return new_centers

	def kmeans_expectation(self, bboxes, assignments, centers):
	"""Expectation part of EM algorithm(Expectation-Maximization)"""
	ious = bbox_overlaps(bboxes, centers)
	closest = ious.argmax(1)
	converged = (closest == assignments).all()
	return converged, closest


	class YOLOV5KMeansAnchorOptimizer(BaseAnchorOptimizer):
	r"""YOLOv5 anchor optimizer using shape k-means.
	Code refer to `ultralytics/yolov5.
	<https://github.com/ultralytics/yolov5/blob/master/utils/autoanchor.py>`_.

	Args:
	iters (int): Maximum iterations for k-means.
	prior_match_thr (float): anchor-label width height
	ratio threshold hyperparameter.
	"""

	def __init__(self,
	iters,
	prior_match_thr=4.0,
	mutation_args=[0.9, 0.1],
	augment_args=[0.9, 1.1],
	**kwargs):

	super().__init__(**kwargs)
	self.iters = iters
	self.prior_match_thr = prior_match_thr
	[self.mutation_prob, self.mutation_sigma] = mutation_args
	[self.augment_min, self.augment_max] = augment_args

	def optimize(self):
	self.logger.info(
	f'Start cluster {self.num_anchors} YOLOv5 anchors with K-means...')

	bbox_whs = torch.from_numpy(self.bbox_whs).to(
	self.device, dtype=torch.float32)
	anchors = self.anchor_generate(
	bbox_whs,
	num=self.num_anchors,
	img_size=self.input_shape[0],
	prior_match_thr=self.prior_match_thr,
	iters=self.iters)
	best_ratio, mean_matched = self.anchor_metric(bbox_whs, anchors)
	self.logger.info(f'{mean_matched:.2f} anchors/target {best_ratio:.3f} '
	'Best Possible Recall (BPR). ')
	self.save_result(anchors.tolist(), self.out_dir)

	def anchor_generate(self,
	box_size: Tensor,
	num: int = 9,
	img_size: int = 640,
	prior_match_thr: float = 4.0,
	iters: int = 1000) -> Tensor:
	"""cluster boxes metric with anchors.

	Args:
	box_size (Tensor): The size of the bxes, which shape is
	(box_num, 2),the number 2 means width and height.
	num (int): number of anchors.
	img_size (int): image size used for training
	prior_match_thr (float): width/height ratio threshold
	used for training
	iters (int): iterations to evolve anchors using genetic algorithm

	Returns:
	anchors (Tensor): kmeans evolved anchors
	"""

	thr = 1 / prior_match_thr

	# step1: filter small bbox
	box_size = self._filter_box(box_size)
	assert num <= len(box_size)

	# step2: init anchors
	if kmeans:
	try:
	self.logger.info(
	'beginning init anchors with scipy kmeans method')
	# sigmas for whitening
	sigmas = box_size.std(0).cpu().numpy()
	anchors = kmeans(
	box_size.cpu().numpy() / sigmas, num, iter=30)[0] * sigmas
	# kmeans may return fewer points than requested
	# if width/height is insufficient or too similar
	assert num == len(anchors)
	except Exception:
	self.logger.warning(
	'scipy kmeans method cannot get enough points '
	'because of width/height is insufficient or too similar, '
	'now switching strategies from kmeans to random init.')
	anchors = np.sort(np.random.rand(num * 2)).reshape(
	num, 2) * img_size
	else:
	self.logger.info(
	'cannot found scipy package, switching strategies from kmeans '
	'to random init, you can install scipy package to '
	'get better anchor init')
	anchors = np.sort(np.random.rand(num * 2)).reshape(num,
	2) * img_size

	self.logger.info('init done, beginning evolve anchors...')
	# sort small to large
	anchors = torch.tensor(anchors[np.argsort(anchors.prod(1))]).to(
	box_size.device, dtype=torch.float32)

	# step3: evolve anchors use Genetic Algorithm
	prog_bar = ProgressBar(iters)
	fitness = self._anchor_fitness(box_size, anchors, thr)
	cluster_shape = anchors.shape

	for _ in range(iters):
	mutate_result = np.ones(cluster_shape)
	# mutate until a change occurs (prevent duplicates)
	while (mutate_result == 1).all():
	# mutate_result is scale factor of anchors, between 0.3 and 3
	mutate_result = (
	(np.random.random(cluster_shape) < self.mutation_prob) *
	random.random() * np.random.randn(cluster_shape)
	self.mutation_sigma + 1).clip(0.3, 3.0)
	mutate_result = torch.from_numpy(mutate_result).to(box_size.device)
	new_anchors = (anchors.clone() * mutate_result).clip(min=2.0)
	new_fitness = self._anchor_fitness(box_size, new_anchors, thr)
	if new_fitness > fitness:
	fitness = new_fitness
	anchors = new_anchors.clone()

	prog_bar.update()
	print('\n')
	# sort small to large
	anchors = anchors[torch.argsort(anchors.prod(1))]
	self.logger.info(f'Anchor cluster finish. fitness = {fitness:.4f}')

	return anchors

	def anchor_metric(self,
	box_size: Tensor,
	anchors: Tensor,
	threshold: float = 4.0) -> Tuple:
	"""compute boxes metric with anchors.

	Args:
	box_size (Tensor): The size of the bxes, which shape
	is (box_num, 2), the number 2 means width and height.
	anchors (Tensor): The size of the bxes, which shape
	is (anchor_num, 2), the number 2 means width and height.
	threshold (float): the compare threshold of ratio

	Returns:
	Tuple: a tuple of metric result, best_ratio_mean and mean_matched
	"""
	# step1: augment scale
	# According to the uniform distribution,the scaling scale between
	# augment_min and augment_max is randomly generated
	scale = np.random.uniform(
	self.augment_min, self.augment_max, size=(box_size.shape[0], 1))
	box_size = torch.tensor(
	np.array(
	[l[:, ] * s for s, l in zip(scale,
	box_size.cpu().numpy())])).to(
	box_size.device,
	dtype=torch.float32)
	# step2: calculate ratio
	min_ratio, best_ratio = self._metric(box_size, anchors)
	mean_matched = (min_ratio > 1 / threshold).float().sum(1).mean()
	best_ratio_mean = (best_ratio > 1 / threshold).float().mean()
	return best_ratio_mean, mean_matched

	def _filter_box(self, box_size: Tensor) -> Tensor:
	small_cnt = (box_size < 3.0).any(1).sum()
	if small_cnt:
	self.logger.warning(
	f'Extremely small objects found: {small_cnt} '
	f'of {len(box_size)} labels are <3 pixels in size')
	# filter > 2 pixels
	filter_sizes = box_size[(box_size >= 2.0).any(1)]
	return filter_sizes

	def _anchor_fitness(self, box_size: Tensor, anchors: Tensor, thr: float):
	"""mutation fitness."""
	_, best = self._metric(box_size, anchors)
	return (best * (best > thr).float()).mean()

	def _metric(self, box_size: Tensor, anchors: Tensor) -> Tuple:
	"""compute boxes metric with anchors.

	Args:
	box_size (Tensor): The size of the bxes, which shape is
	(box_num, 2), the number 2 means width and height.
	anchors (Tensor): The size of the bxes, which shape is
	(anchor_num, 2), the number 2 means width and height.

	Returns:
	Tuple: a tuple of metric result, min_ratio and best_ratio
	"""

	# ratio means the (width_1/width_2 and height_1/height_2) ratio of each
	# box and anchor, the ratio shape is torch.Size([box_num,anchor_num,2])
	ratio = box_size[:, None] / anchors[None]

	# min_ratio records the min ratio of each box with all anchor,
	# min_ratio.shape is torch.Size([box_num,anchor_num])
	# notice:
	# smaller ratio means worse shape-match between boxes and anchors
	min_ratio = torch.min(ratio, 1 / ratio).min(2)[0]

	# find the best shape-match ratio for each box
	# box_best_ratio.shape is torch.Size([box_num])
	best_ratio = min_ratio.max(1)[0]

	return min_ratio, best_ratio


	class YOLODEAnchorOptimizer(BaseAnchorOptimizer):
	"""YOLO anchor optimizer using differential evolution algorithm.

	Args:
	iters (int): Maximum iterations for k-means.
	strategy (str): The differential evolution strategy to use.
	Should be one of:

	- 'best1bin'
	- 'best1exp'
	- 'rand1exp'
	- 'randtobest1exp'
	- 'currenttobest1exp'
	- 'best2exp'
	- 'rand2exp'
	- 'randtobest1bin'
	- 'currenttobest1bin'
	- 'best2bin'
	- 'rand2bin'
	- 'rand1bin'

	Default: 'best1bin'.
	population_size (int): Total population size of evolution algorithm.
	Default: 15.
	convergence_thr (float): Tolerance for convergence, the
	optimizing stops when ``np.std(pop) <= abs(convergence_thr)
	+ convergence_thr * np.abs(np.mean(population_energies))``,
	respectively. Default: 0.0001.
	mutation (tuple[float]): Range of dithering randomly changes the
	mutation constant. Default: (0.5, 1).
	recombination (float): Recombination constant of crossover probability.
	Default: 0.7.
	"""

	def __init__(self,
	iters,
	strategy='best1bin',
	population_size=15,
	convergence_thr=0.0001,
	mutation=(0.5, 1),
	recombination=0.7,
	**kwargs):

	super().__init__(**kwargs)

	self.iters = iters
	self.strategy = strategy
	self.population_size = population_size
	self.convergence_thr = convergence_thr
	self.mutation = mutation
	self.recombination = recombination

	def optimize(self):
	anchors = self.differential_evolution()
	self.save_result(anchors, self.out_dir)

	def differential_evolution(self):
	bboxes = self.get_zero_center_bbox_tensor()

	bounds = []
	for i in range(self.num_anchors):
	bounds.extend([(0, self.input_shape[0]), (0, self.input_shape[1])])

	result = differential_evolution(
	func=self.avg_iou_cost,
	bounds=bounds,
	args=(bboxes, ),
	strategy=self.strategy,
	maxiter=self.iters,
	popsize=self.population_size,
	tol=self.convergence_thr,
	mutation=self.mutation,
	recombination=self.recombination,
	updating='immediate',
	disp=True)
	self.logger.info(
	f'Anchor evolution finish. Average IOU: {1 - result.fun}')
	anchors = [(w, h) for w, h in zip(result.x[::2], result.x[1::2])]
	anchors = sorted(anchors, key=lambda x: x[0] * x[1])
	return anchors

	@staticmethod
	def avg_iou_cost(anchor_params, bboxes):
	assert len(anchor_params) % 2 == 0
	anchor_whs = torch.tensor(
	[[w, h]
	for w, h in zip(anchor_params[::2], anchor_params[1::2])]).to(
	bboxes.device, dtype=bboxes.dtype)
	anchor_boxes = bbox_cxcywh_to_xyxy(
	torch.cat([torch.zeros_like(anchor_whs), anchor_whs], dim=1))
	ious = bbox_overlaps(bboxes, anchor_boxes)
	max_ious, _ = ious.max(1)
	cost = 1 - max_ious.mean().item()
	return cost


	def main():
	logger = MMLogger.get_current_instance()
	args = parse_args()
	cfg = args.config
	cfg = Config.fromfile(cfg)

	# replace the ${key} with the value of cfg.key
	cfg = replace_cfg_vals(cfg)

	# update data root according to MMDET_DATASETS
	update_data_root(cfg)

	init_default_scope(cfg.get('default_scope', 'mmyolo'))

	input_shape = args.input_shape
	assert len(input_shape) == 2

	anchor_type = cfg.model.bbox_head.prior_generator.type
	assert anchor_type == 'mmdet.YOLOAnchorGenerator', \
	f'Only support optimize YOLOAnchor, but get {anchor_type}.'

	base_sizes = cfg.model.bbox_head.prior_generator.base_sizes
	num_anchor_per_level = [len(sizes) for sizes in base_sizes]

	train_data_cfg = cfg.train_dataloader
	while 'dataset' in train_data_cfg:
	train_data_cfg = train_data_cfg['dataset']
	dataset = DATASETS.build(train_data_cfg)

	if args.algorithm == 'k-means':
	optimizer = YOLOKMeansAnchorOptimizer(
	dataset=dataset,
	input_shape=input_shape,
	device=args.device,
	num_anchor_per_level=num_anchor_per_level,
	iters=args.iters,
	logger=logger,
	out_dir=args.out_dir)
	elif args.algorithm == 'DE':
	optimizer = YOLODEAnchorOptimizer(
	dataset=dataset,
	input_shape=input_shape,
	device=args.device,
	num_anchor_per_level=num_anchor_per_level,
	iters=args.iters,
	logger=logger,
	out_dir=args.out_dir)
	elif args.algorithm == 'v5-k-means':
	optimizer = YOLOV5KMeansAnchorOptimizer(
	dataset=dataset,
	input_shape=input_shape,
	device=args.device,
	num_anchor_per_level=num_anchor_per_level,
	iters=args.iters,
	prior_match_thr=args.prior_match_thr,
	mutation_args=args.mutation_args,
	augment_args=args.augment_args,
	logger=logger,
	out_dir=args.out_dir)
	else:
	raise NotImplementedError(
	f'Only support k-means and differential_evolution, '
	f'but get {args.algorithm}')

	optimizer.optimize()


	if __name__ == '__main__':
	main()