Spaces:

chendl
/

compositional_test

Runtime error

App Files Files Community

compositional_test / multimodal /YOLOX /yolox /exp /yolox_base.py

chendl

Add application file

0b7b08a about 1 year ago

raw

history blame

13.6 kB

	#!/usr/bin/env python3
	# Copyright (c) Megvii Inc. All rights reserved.

	import os
	import random

	import torch
	import torch.distributed as dist
	import torch.nn as nn

	from .base_exp import BaseExp

	__all__ = ["Exp", "check_exp_value"]


	class Exp(BaseExp):
	def __init__(self):
	super().__init__()

	# ---------------- model config ---------------- #
	# detect classes number of model
	self.num_classes = 80
	# factor of model depth
	self.depth = 1.00
	# factor of model width
	self.width = 1.00
	# activation name. For example, if using "relu", then "silu" will be replaced to "relu".
	self.act = "silu"

	# ---------------- dataloader config ---------------- #
	# set worker to 4 for shorter dataloader init time
	# If your training process cost many memory, reduce this value.
	self.data_num_workers = 4
	self.input_size = (640, 640) # (height, width)
	# Actual multiscale ranges: [640 - 5 * 32, 640 + 5 * 32].
	# To disable multiscale training, set the value to 0.
	self.multiscale_range = 5
	# You can uncomment this line to specify a multiscale range
	# self.random_size = (14, 26)
	# dir of dataset images, if data_dir is None, this project will use `datasets` dir
	self.data_dir = None
	# name of annotation file for training
	self.train_ann = "instances_train2017.json"
	# name of annotation file for evaluation
	self.val_ann = "instances_val2017.json"
	# name of annotation file for testing
	self.test_ann = "instances_test2017.json"

	# --------------- transform config ----------------- #
	# prob of applying mosaic aug
	self.mosaic_prob = 1.0
	# prob of applying mixup aug
	self.mixup_prob = 1.0
	# prob of applying hsv aug
	self.hsv_prob = 1.0
	# prob of applying flip aug
	self.flip_prob = 0.5
	# rotation angle range, for example, if set to 2, the true range is (-2, 2)
	self.degrees = 10.0
	# translate range, for example, if set to 0.1, the true range is (-0.1, 0.1)
	self.translate = 0.1
	self.mosaic_scale = (0.1, 2)
	# apply mixup aug or not
	self.enable_mixup = True
	self.mixup_scale = (0.5, 1.5)
	# shear angle range, for example, if set to 2, the true range is (-2, 2)
	self.shear = 2.0

	# -------------- training config --------------------- #
	# epoch number used for warmup
	self.warmup_epochs = 5
	# max training epoch
	self.max_epoch = 300
	# minimum learning rate during warmup
	self.warmup_lr = 0
	self.min_lr_ratio = 0.05
	# learning rate for one image. During training, lr will multiply batchsize.
	self.basic_lr_per_img = 0.01 / 64.0
	# name of LRScheduler
	self.scheduler = "yoloxwarmcos"
	# last #epoch to close augmention like mosaic
	self.no_aug_epochs = 15
	# apply EMA during training
	self.ema = True

	# weight decay of optimizer
	self.weight_decay = 5e-4
	# momentum of optimizer
	self.momentum = 0.9
	# log period in iter, for example,
	# if set to 1, user could see log every iteration.
	self.print_interval = 10
	# eval period in epoch, for example,
	# if set to 1, model will be evaluate after every epoch.
	self.eval_interval = 10
	# save history checkpoint or not.
	# If set to False, yolox will only save latest and best ckpt.
	self.save_history_ckpt = True
	# name of experiment
	self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]

	# ----------------- testing config ------------------ #
	# output image size during evaluation/test
	self.test_size = (640, 640)
	# confidence threshold during evaluation/test,
	# boxes whose scores are less than test_conf will be filtered
	self.test_conf = 0.01
	# nms threshold
	self.nmsthre = 0.65

	def get_model(self):
	from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead

	def init_yolo(M):
	for m in M.modules():
	if isinstance(m, nn.BatchNorm2d):
	m.eps = 1e-3
	m.momentum = 0.03

	if getattr(self, "model", None) is None:
	in_channels = [256, 512, 1024]
	backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, act=self.act)
	head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, act=self.act)
	self.model = YOLOX(backbone, head)

	self.model.apply(init_yolo)
	self.model.head.initialize_biases(1e-2)
	self.model.train()
	return self.model

	def get_dataset(self, cache: bool = False, cache_type: str = "ram"):
	"""
	Get dataset according to cache and cache_type parameters.
	Args:
	cache (bool): Whether to cache imgs to ram or disk.
	cache_type (str, optional): Defaults to "ram".
	"ram" : Caching imgs to ram for fast training.
	"disk": Caching imgs to disk for fast training.
	"""
	from yolox.data import COCODataset, TrainTransform

	return COCODataset(
	data_dir=self.data_dir,
	json_file=self.train_ann,
	img_size=self.input_size,
	preproc=TrainTransform(
	max_labels=50,
	flip_prob=self.flip_prob,
	hsv_prob=self.hsv_prob
	),
	cache=cache,
	cache_type=cache_type,
	)

	def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img: str = None):
	"""
	Get dataloader according to cache_img parameter.
	Args:
	no_aug (bool, optional): Whether to turn off mosaic data enhancement. Defaults to False.
	cache_img (str, optional): cache_img is equivalent to cache_type. Defaults to None.
	"ram" : Caching imgs to ram for fast training.
	"disk": Caching imgs to disk for fast training.
	None: Do not use cache, in this case cache_data is also None.
	"""
	from yolox.data import (
	TrainTransform,
	YoloBatchSampler,
	DataLoader,
	InfiniteSampler,
	MosaicDetection,
	worker_init_reset_seed,
	)
	from yolox.utils import wait_for_the_master

	# if cache is True, we will create self.dataset before launch
	# else we will create self.dataset after launch
	if self.dataset is None:
	with wait_for_the_master():
	assert cache_img is None, \
	"cache_img must be None if you didn't create self.dataset before launch"
	self.dataset = self.get_dataset(cache=False, cache_type=cache_img)

	self.dataset = MosaicDetection(
	dataset=self.dataset,
	mosaic=not no_aug,
	img_size=self.input_size,
	preproc=TrainTransform(
	max_labels=120,
	flip_prob=self.flip_prob,
	hsv_prob=self.hsv_prob),
	degrees=self.degrees,
	translate=self.translate,
	mosaic_scale=self.mosaic_scale,
	mixup_scale=self.mixup_scale,
	shear=self.shear,
	enable_mixup=self.enable_mixup,
	mosaic_prob=self.mosaic_prob,
	mixup_prob=self.mixup_prob,
	)

	if is_distributed:
	batch_size = batch_size // dist.get_world_size()

	sampler = InfiniteSampler(len(self.dataset), seed=self.seed if self.seed else 0)

	batch_sampler = YoloBatchSampler(
	sampler=sampler,
	batch_size=batch_size,
	drop_last=False,
	mosaic=not no_aug,
	)

	dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
	dataloader_kwargs["batch_sampler"] = batch_sampler

	# Make sure each process has different random seed, especially for 'fork' method.
	# Check https://github.com/pytorch/pytorch/issues/63311 for more details.
	dataloader_kwargs["worker_init_fn"] = worker_init_reset_seed

	train_loader = DataLoader(self.dataset, **dataloader_kwargs)

	return train_loader

	def random_resize(self, data_loader, epoch, rank, is_distributed):
	tensor = torch.LongTensor(2).cuda()

	if rank == 0:
	size_factor = self.input_size[1] * 1.0 / self.input_size[0]
	if not hasattr(self, 'random_size'):
	min_size = int(self.input_size[0] / 32) - self.multiscale_range
	max_size = int(self.input_size[0] / 32) + self.multiscale_range
	self.random_size = (min_size, max_size)
	size = random.randint(*self.random_size)
	size = (int(32 * size), 32 * int(size * size_factor))
	tensor[0] = size[0]
	tensor[1] = size[1]

	if is_distributed:
	dist.barrier()
	dist.broadcast(tensor, 0)

	input_size = (tensor[0].item(), tensor[1].item())
	return input_size

	def preprocess(self, inputs, targets, tsize):
	scale_y = tsize[0] / self.input_size[0]
	scale_x = tsize[1] / self.input_size[1]
	if scale_x != 1 or scale_y != 1:
	inputs = nn.functional.interpolate(
	inputs, size=tsize, mode="bilinear", align_corners=False
	)
	targets[..., 1::2] = targets[..., 1::2] * scale_x
	targets[..., 2::2] = targets[..., 2::2] * scale_y
	return inputs, targets

	def get_optimizer(self, batch_size):
	if "optimizer" not in self.__dict__:
	if self.warmup_epochs > 0:
	lr = self.warmup_lr
	else:
	lr = self.basic_lr_per_img * batch_size

	pg0, pg1, pg2 = [], [], [] # optimizer parameter groups

	for k, v in self.model.named_modules():
	if hasattr(v, "bias") and isinstance(v.bias, nn.Parameter):
	pg2.append(v.bias) # biases
	if isinstance(v, nn.BatchNorm2d) or "bn" in k:
	pg0.append(v.weight) # no decay
	elif hasattr(v, "weight") and isinstance(v.weight, nn.Parameter):
	pg1.append(v.weight) # apply decay

	optimizer = torch.optim.SGD(
	pg0, lr=lr, momentum=self.momentum, nesterov=True
	)
	optimizer.add_param_group(
	{"params": pg1, "weight_decay": self.weight_decay}
	) # add pg1 with weight_decay
	optimizer.add_param_group({"params": pg2})
	self.optimizer = optimizer

	return self.optimizer

	def get_lr_scheduler(self, lr, iters_per_epoch):
	from yolox.utils import LRScheduler

	scheduler = LRScheduler(
	self.scheduler,
	lr,
	iters_per_epoch,
	self.max_epoch,
	warmup_epochs=self.warmup_epochs,
	warmup_lr_start=self.warmup_lr,
	no_aug_epochs=self.no_aug_epochs,
	min_lr_ratio=self.min_lr_ratio,
	)
	return scheduler

	def get_eval_dataset(self, **kwargs):
	from yolox.data import COCODataset, ValTransform
	testdev = kwargs.get("testdev", False)
	legacy = kwargs.get("legacy", False)

	return COCODataset(
	data_dir=self.data_dir,
	json_file=self.val_ann if not testdev else self.test_ann,
	name="val2017" if not testdev else "test2017",
	img_size=self.test_size,
	preproc=ValTransform(legacy=legacy),
	)

	def get_eval_loader(self, batch_size, is_distributed, **kwargs):
	valdataset = self.get_eval_dataset(**kwargs)

	if is_distributed:
	batch_size = batch_size // dist.get_world_size()
	sampler = torch.utils.data.distributed.DistributedSampler(
	valdataset, shuffle=False
	)
	else:
	sampler = torch.utils.data.SequentialSampler(valdataset)

	dataloader_kwargs = {
	"num_workers": self.data_num_workers,
	"pin_memory": True,
	"sampler": sampler,
	}
	dataloader_kwargs["batch_size"] = batch_size
	val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs)

	return val_loader

	def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False):
	from yolox.evaluators import COCOEvaluator

	return COCOEvaluator(
	dataloader=self.get_eval_loader(batch_size, is_distributed,
	testdev=testdev, legacy=legacy),
	img_size=self.test_size,
	confthre=self.test_conf,
	nmsthre=self.nmsthre,
	num_classes=self.num_classes,
	testdev=testdev,
	)

	def get_trainer(self, args):
	from yolox.core import Trainer
	trainer = Trainer(self, args)
	# NOTE: trainer shouldn't be an attribute of exp object
	return trainer

	def eval(self, model, evaluator, is_distributed, half=False, return_outputs=False):
	return evaluator.evaluate(model, is_distributed, half, return_outputs=return_outputs)


	def check_exp_value(exp: Exp):
	h, w = exp.input_size
	assert h % 32 == 0 and w % 32 == 0, "input size must be multiples of 32"