Spaces:
Running
on
A10G
Running
on
A10G
# Copyright (c) OpenMMLab. All rights reserved. | |
import functools | |
import warnings | |
from collections import abc | |
from inspect import getfullargspec | |
import numpy as np | |
import torch | |
import torch.nn as nn | |
from annotator.uniformer.mmcv.utils import TORCH_VERSION, digit_version | |
from .dist_utils import allreduce_grads as _allreduce_grads | |
try: | |
# If PyTorch version >= 1.6.0, torch.cuda.amp.autocast would be imported | |
# and used; otherwise, auto fp16 will adopt mmcv's implementation. | |
# Note that when PyTorch >= 1.6.0, we still cast tensor types to fp16 | |
# manually, so the behavior may not be consistent with real amp. | |
from torch.cuda.amp import autocast | |
except ImportError: | |
pass | |
def cast_tensor_type(inputs, src_type, dst_type): | |
"""Recursively convert Tensor in inputs from src_type to dst_type. | |
Args: | |
inputs: Inputs that to be casted. | |
src_type (torch.dtype): Source type.. | |
dst_type (torch.dtype): Destination type. | |
Returns: | |
The same type with inputs, but all contained Tensors have been cast. | |
""" | |
if isinstance(inputs, nn.Module): | |
return inputs | |
elif isinstance(inputs, torch.Tensor): | |
return inputs.to(dst_type) | |
elif isinstance(inputs, str): | |
return inputs | |
elif isinstance(inputs, np.ndarray): | |
return inputs | |
elif isinstance(inputs, abc.Mapping): | |
return type(inputs)({ | |
k: cast_tensor_type(v, src_type, dst_type) | |
for k, v in inputs.items() | |
}) | |
elif isinstance(inputs, abc.Iterable): | |
return type(inputs)( | |
cast_tensor_type(item, src_type, dst_type) for item in inputs) | |
else: | |
return inputs | |
def auto_fp16(apply_to=None, out_fp32=False): | |
"""Decorator to enable fp16 training automatically. | |
This decorator is useful when you write custom modules and want to support | |
mixed precision training. If inputs arguments are fp32 tensors, they will | |
be converted to fp16 automatically. Arguments other than fp32 tensors are | |
ignored. If you are using PyTorch >= 1.6, torch.cuda.amp is used as the | |
backend, otherwise, original mmcv implementation will be adopted. | |
Args: | |
apply_to (Iterable, optional): The argument names to be converted. | |
`None` indicates all arguments. | |
out_fp32 (bool): Whether to convert the output back to fp32. | |
Example: | |
>>> import torch.nn as nn | |
>>> class MyModule1(nn.Module): | |
>>> | |
>>> # Convert x and y to fp16 | |
>>> @auto_fp16() | |
>>> def forward(self, x, y): | |
>>> pass | |
>>> import torch.nn as nn | |
>>> class MyModule2(nn.Module): | |
>>> | |
>>> # convert pred to fp16 | |
>>> @auto_fp16(apply_to=('pred', )) | |
>>> def do_something(self, pred, others): | |
>>> pass | |
""" | |
def auto_fp16_wrapper(old_func): | |
def new_func(*args, **kwargs): | |
# check if the module has set the attribute `fp16_enabled`, if not, | |
# just fallback to the original method. | |
if not isinstance(args[0], torch.nn.Module): | |
raise TypeError('@auto_fp16 can only be used to decorate the ' | |
'method of nn.Module') | |
if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled): | |
return old_func(*args, **kwargs) | |
# get the arg spec of the decorated method | |
args_info = getfullargspec(old_func) | |
# get the argument names to be casted | |
args_to_cast = args_info.args if apply_to is None else apply_to | |
# convert the args that need to be processed | |
new_args = [] | |
# NOTE: default args are not taken into consideration | |
if args: | |
arg_names = args_info.args[:len(args)] | |
for i, arg_name in enumerate(arg_names): | |
if arg_name in args_to_cast: | |
new_args.append( | |
cast_tensor_type(args[i], torch.float, torch.half)) | |
else: | |
new_args.append(args[i]) | |
# convert the kwargs that need to be processed | |
new_kwargs = {} | |
if kwargs: | |
for arg_name, arg_value in kwargs.items(): | |
if arg_name in args_to_cast: | |
new_kwargs[arg_name] = cast_tensor_type( | |
arg_value, torch.float, torch.half) | |
else: | |
new_kwargs[arg_name] = arg_value | |
# apply converted arguments to the decorated method | |
if (TORCH_VERSION != 'parrots' and | |
digit_version(TORCH_VERSION) >= digit_version('1.6.0')): | |
with autocast(enabled=True): | |
output = old_func(*new_args, **new_kwargs) | |
else: | |
output = old_func(*new_args, **new_kwargs) | |
# cast the results back to fp32 if necessary | |
if out_fp32: | |
output = cast_tensor_type(output, torch.half, torch.float) | |
return output | |
return new_func | |
return auto_fp16_wrapper | |
def force_fp32(apply_to=None, out_fp16=False): | |
"""Decorator to convert input arguments to fp32 in force. | |
This decorator is useful when you write custom modules and want to support | |
mixed precision training. If there are some inputs that must be processed | |
in fp32 mode, then this decorator can handle it. If inputs arguments are | |
fp16 tensors, they will be converted to fp32 automatically. Arguments other | |
than fp16 tensors are ignored. If you are using PyTorch >= 1.6, | |
torch.cuda.amp is used as the backend, otherwise, original mmcv | |
implementation will be adopted. | |
Args: | |
apply_to (Iterable, optional): The argument names to be converted. | |
`None` indicates all arguments. | |
out_fp16 (bool): Whether to convert the output back to fp16. | |
Example: | |
>>> import torch.nn as nn | |
>>> class MyModule1(nn.Module): | |
>>> | |
>>> # Convert x and y to fp32 | |
>>> @force_fp32() | |
>>> def loss(self, x, y): | |
>>> pass | |
>>> import torch.nn as nn | |
>>> class MyModule2(nn.Module): | |
>>> | |
>>> # convert pred to fp32 | |
>>> @force_fp32(apply_to=('pred', )) | |
>>> def post_process(self, pred, others): | |
>>> pass | |
""" | |
def force_fp32_wrapper(old_func): | |
def new_func(*args, **kwargs): | |
# check if the module has set the attribute `fp16_enabled`, if not, | |
# just fallback to the original method. | |
if not isinstance(args[0], torch.nn.Module): | |
raise TypeError('@force_fp32 can only be used to decorate the ' | |
'method of nn.Module') | |
if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled): | |
return old_func(*args, **kwargs) | |
# get the arg spec of the decorated method | |
args_info = getfullargspec(old_func) | |
# get the argument names to be casted | |
args_to_cast = args_info.args if apply_to is None else apply_to | |
# convert the args that need to be processed | |
new_args = [] | |
if args: | |
arg_names = args_info.args[:len(args)] | |
for i, arg_name in enumerate(arg_names): | |
if arg_name in args_to_cast: | |
new_args.append( | |
cast_tensor_type(args[i], torch.half, torch.float)) | |
else: | |
new_args.append(args[i]) | |
# convert the kwargs that need to be processed | |
new_kwargs = dict() | |
if kwargs: | |
for arg_name, arg_value in kwargs.items(): | |
if arg_name in args_to_cast: | |
new_kwargs[arg_name] = cast_tensor_type( | |
arg_value, torch.half, torch.float) | |
else: | |
new_kwargs[arg_name] = arg_value | |
# apply converted arguments to the decorated method | |
if (TORCH_VERSION != 'parrots' and | |
digit_version(TORCH_VERSION) >= digit_version('1.6.0')): | |
with autocast(enabled=False): | |
output = old_func(*new_args, **new_kwargs) | |
else: | |
output = old_func(*new_args, **new_kwargs) | |
# cast the results back to fp32 if necessary | |
if out_fp16: | |
output = cast_tensor_type(output, torch.float, torch.half) | |
return output | |
return new_func | |
return force_fp32_wrapper | |
def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): | |
warnings.warning( | |
'"mmcv.runner.fp16_utils.allreduce_grads" is deprecated, and will be ' | |
'removed in v2.8. Please switch to "mmcv.runner.allreduce_grads') | |
_allreduce_grads(params, coalesce=coalesce, bucket_size_mb=bucket_size_mb) | |
def wrap_fp16_model(model): | |
"""Wrap the FP32 model to FP16. | |
If you are using PyTorch >= 1.6, torch.cuda.amp is used as the | |
backend, otherwise, original mmcv implementation will be adopted. | |
For PyTorch >= 1.6, this function will | |
1. Set fp16 flag inside the model to True. | |
Otherwise: | |
1. Convert FP32 model to FP16. | |
2. Remain some necessary layers to be FP32, e.g., normalization layers. | |
3. Set `fp16_enabled` flag inside the model to True. | |
Args: | |
model (nn.Module): Model in FP32. | |
""" | |
if (TORCH_VERSION == 'parrots' | |
or digit_version(TORCH_VERSION) < digit_version('1.6.0')): | |
# convert model to fp16 | |
model.half() | |
# patch the normalization layers to make it work in fp32 mode | |
patch_norm_fp32(model) | |
# set `fp16_enabled` flag | |
for m in model.modules(): | |
if hasattr(m, 'fp16_enabled'): | |
m.fp16_enabled = True | |
def patch_norm_fp32(module): | |
"""Recursively convert normalization layers from FP16 to FP32. | |
Args: | |
module (nn.Module): The modules to be converted in FP16. | |
Returns: | |
nn.Module: The converted module, the normalization layers have been | |
converted to FP32. | |
""" | |
if isinstance(module, (nn.modules.batchnorm._BatchNorm, nn.GroupNorm)): | |
module.float() | |
if isinstance(module, nn.GroupNorm) or torch.__version__ < '1.3': | |
module.forward = patch_forward_method(module.forward, torch.half, | |
torch.float) | |
for child in module.children(): | |
patch_norm_fp32(child) | |
return module | |
def patch_forward_method(func, src_type, dst_type, convert_output=True): | |
"""Patch the forward method of a module. | |
Args: | |
func (callable): The original forward method. | |
src_type (torch.dtype): Type of input arguments to be converted from. | |
dst_type (torch.dtype): Type of input arguments to be converted to. | |
convert_output (bool): Whether to convert the output back to src_type. | |
Returns: | |
callable: The patched forward method. | |
""" | |
def new_forward(*args, **kwargs): | |
output = func(*cast_tensor_type(args, src_type, dst_type), | |
**cast_tensor_type(kwargs, src_type, dst_type)) | |
if convert_output: | |
output = cast_tensor_type(output, dst_type, src_type) | |
return output | |
return new_forward | |
class LossScaler: | |
"""Class that manages loss scaling in mixed precision training which | |
supports both dynamic or static mode. | |
The implementation refers to | |
https://github.com/NVIDIA/apex/blob/master/apex/fp16_utils/loss_scaler.py. | |
Indirectly, by supplying ``mode='dynamic'`` for dynamic loss scaling. | |
It's important to understand how :class:`LossScaler` operates. | |
Loss scaling is designed to combat the problem of underflowing | |
gradients encountered at long times when training fp16 networks. | |
Dynamic loss scaling begins by attempting a very high loss | |
scale. Ironically, this may result in OVERflowing gradients. | |
If overflowing gradients are encountered, :class:`FP16_Optimizer` then | |
skips the update step for this particular iteration/minibatch, | |
and :class:`LossScaler` adjusts the loss scale to a lower value. | |
If a certain number of iterations occur without overflowing gradients | |
detected,:class:`LossScaler` increases the loss scale once more. | |
In this way :class:`LossScaler` attempts to "ride the edge" of always | |
using the highest loss scale possible without incurring overflow. | |
Args: | |
init_scale (float): Initial loss scale value, default: 2**32. | |
scale_factor (float): Factor used when adjusting the loss scale. | |
Default: 2. | |
mode (str): Loss scaling mode. 'dynamic' or 'static' | |
scale_window (int): Number of consecutive iterations without an | |
overflow to wait before increasing the loss scale. Default: 1000. | |
""" | |
def __init__(self, | |
init_scale=2**32, | |
mode='dynamic', | |
scale_factor=2., | |
scale_window=1000): | |
self.cur_scale = init_scale | |
self.cur_iter = 0 | |
assert mode in ('dynamic', | |
'static'), 'mode can only be dynamic or static' | |
self.mode = mode | |
self.last_overflow_iter = -1 | |
self.scale_factor = scale_factor | |
self.scale_window = scale_window | |
def has_overflow(self, params): | |
"""Check if params contain overflow.""" | |
if self.mode != 'dynamic': | |
return False | |
for p in params: | |
if p.grad is not None and LossScaler._has_inf_or_nan(p.grad.data): | |
return True | |
return False | |
def _has_inf_or_nan(x): | |
"""Check if params contain NaN.""" | |
try: | |
cpu_sum = float(x.float().sum()) | |
except RuntimeError as instance: | |
if 'value cannot be converted' not in instance.args[0]: | |
raise | |
return True | |
else: | |
if cpu_sum == float('inf') or cpu_sum == -float('inf') \ | |
or cpu_sum != cpu_sum: | |
return True | |
return False | |
def update_scale(self, overflow): | |
"""update the current loss scale value when overflow happens.""" | |
if self.mode != 'dynamic': | |
return | |
if overflow: | |
self.cur_scale = max(self.cur_scale / self.scale_factor, 1) | |
self.last_overflow_iter = self.cur_iter | |
else: | |
if (self.cur_iter - self.last_overflow_iter) % \ | |
self.scale_window == 0: | |
self.cur_scale *= self.scale_factor | |
self.cur_iter += 1 | |
def state_dict(self): | |
"""Returns the state of the scaler as a :class:`dict`.""" | |
return dict( | |
cur_scale=self.cur_scale, | |
cur_iter=self.cur_iter, | |
mode=self.mode, | |
last_overflow_iter=self.last_overflow_iter, | |
scale_factor=self.scale_factor, | |
scale_window=self.scale_window) | |
def load_state_dict(self, state_dict): | |
"""Loads the loss_scaler state dict. | |
Args: | |
state_dict (dict): scaler state. | |
""" | |
self.cur_scale = state_dict['cur_scale'] | |
self.cur_iter = state_dict['cur_iter'] | |
self.mode = state_dict['mode'] | |
self.last_overflow_iter = state_dict['last_overflow_iter'] | |
self.scale_factor = state_dict['scale_factor'] | |
self.scale_window = state_dict['scale_window'] | |
def loss_scale(self): | |
return self.cur_scale | |