Spaces:
Starting
on
L40S
Starting
on
L40S
File size: 14,116 Bytes
b213d84 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 |
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates.
import inspect
import numpy as np
import pprint
from typing import Any, List, Optional, Tuple, Union
from fvcore.transforms.transform import Transform, TransformList
"""
See "Data Augmentation" tutorial for an overview of the system:
https://detectron2.readthedocs.io/tutorials/augmentation.html
"""
__all__ = [
"Augmentation",
"AugmentationList",
"AugInput",
"TransformGen",
"apply_transform_gens",
"StandardAugInput",
"apply_augmentations",
]
def _check_img_dtype(img):
assert isinstance(img, np.ndarray), "[Augmentation] Needs an numpy array, but got a {}!".format(
type(img)
)
assert not isinstance(img.dtype, np.integer) or (
img.dtype == np.uint8
), "[Augmentation] Got image of type {}, use uint8 or floating points instead!".format(
img.dtype
)
assert img.ndim in [2, 3], img.ndim
def _get_aug_input_args(aug, aug_input) -> List[Any]:
"""
Get the arguments to be passed to ``aug.get_transform`` from the input ``aug_input``.
"""
if aug.input_args is None:
# Decide what attributes are needed automatically
prms = list(inspect.signature(aug.get_transform).parameters.items())
# The default behavior is: if there is one parameter, then its "image"
# (work automatically for majority of use cases, and also avoid BC breaking),
# Otherwise, use the argument names.
if len(prms) == 1:
names = ("image",)
else:
names = []
for name, prm in prms:
if prm.kind in (
inspect.Parameter.VAR_POSITIONAL,
inspect.Parameter.VAR_KEYWORD,
):
raise TypeError(
f""" \
The default implementation of `{type(aug)}.__call__` does not allow \
`{type(aug)}.get_transform` to use variable-length arguments (*args, **kwargs)! \
If arguments are unknown, reimplement `__call__` instead. \
"""
)
names.append(name)
aug.input_args = tuple(names)
args = []
for f in aug.input_args:
try:
args.append(getattr(aug_input, f))
except AttributeError as e:
raise AttributeError(
f"{type(aug)}.get_transform needs input attribute '{f}', "
f"but it is not an attribute of {type(aug_input)}!"
) from e
return args
class Augmentation:
"""
Augmentation defines (often random) policies/strategies to generate :class:`Transform`
from data. It is often used for pre-processing of input data.
A "policy" that generates a :class:`Transform` may, in the most general case,
need arbitrary information from input data in order to determine what transforms
to apply. Therefore, each :class:`Augmentation` instance defines the arguments
needed by its :meth:`get_transform` method. When called with the positional arguments,
the :meth:`get_transform` method executes the policy.
Note that :class:`Augmentation` defines the policies to create a :class:`Transform`,
but not how to execute the actual transform operations to those data.
Its :meth:`__call__` method will use :meth:`AugInput.transform` to execute the transform.
The returned `Transform` object is meant to describe deterministic transformation, which means
it can be re-applied on associated data, e.g. the geometry of an image and its segmentation
masks need to be transformed together.
(If such re-application is not needed, then determinism is not a crucial requirement.)
"""
input_args: Optional[Tuple[str]] = None
"""
Stores the attribute names needed by :meth:`get_transform`, e.g. ``("image", "sem_seg")``.
By default, it is just a tuple of argument names in :meth:`self.get_transform`, which often only
contain "image". As long as the argument name convention is followed, there is no need for
users to touch this attribute.
"""
def _init(self, params=None):
if params:
for k, v in params.items():
if k != "self" and not k.startswith("_"):
setattr(self, k, v)
def get_transform(self, *args) -> Transform:
"""
Execute the policy based on input data, and decide what transform to apply to inputs.
Args:
args: Any fixed-length positional arguments. By default, the name of the arguments
should exist in the :class:`AugInput` to be used.
Returns:
Transform: Returns the deterministic transform to apply to the input.
Examples:
::
class MyAug:
# if a policy needs to know both image and semantic segmentation
def get_transform(image, sem_seg) -> T.Transform:
pass
tfm: Transform = MyAug().get_transform(image, sem_seg)
new_image = tfm.apply_image(image)
Notes:
Users can freely use arbitrary new argument names in custom
:meth:`get_transform` method, as long as they are available in the
input data. In detectron2 we use the following convention:
* image: (H,W) or (H,W,C) ndarray of type uint8 in range [0, 255], or
floating point in range [0, 1] or [0, 255].
* boxes: (N,4) ndarray of float32. It represents the instance bounding boxes
of N instances. Each is in XYXY format in unit of absolute coordinates.
* sem_seg: (H,W) ndarray of type uint8. Each element is an integer label of pixel.
We do not specify convention for other types and do not include builtin
:class:`Augmentation` that uses other types in detectron2.
"""
raise NotImplementedError
def __call__(self, aug_input) -> Transform:
"""
Augment the given `aug_input` **in-place**, and return the transform that's used.
This method will be called to apply the augmentation. In most augmentation, it
is enough to use the default implementation, which calls :meth:`get_transform`
using the inputs. But a subclass can overwrite it to have more complicated logic.
Args:
aug_input (AugInput): an object that has attributes needed by this augmentation
(defined by ``self.get_transform``). Its ``transform`` method will be called
to in-place transform it.
Returns:
Transform: the transform that is applied on the input.
"""
args = _get_aug_input_args(self, aug_input)
tfm = self.get_transform(*args)
assert isinstance(tfm, (Transform, TransformList)), (
f"{type(self)}.get_transform must return an instance of Transform! "
f"Got {type(tfm)} instead."
)
aug_input.transform(tfm)
return tfm
def _rand_range(self, low=1.0, high=None, size=None):
"""
Uniform float random number between low and high.
"""
if high is None:
low, high = 0, low
if size is None:
size = []
return np.random.uniform(low, high, size)
def __repr__(self):
"""
Produce something like:
"MyAugmentation(field1={self.field1}, field2={self.field2})"
"""
try:
sig = inspect.signature(self.__init__)
classname = type(self).__name__
argstr = []
for name, param in sig.parameters.items():
assert (
param.kind != param.VAR_POSITIONAL and param.kind != param.VAR_KEYWORD
), "The default __repr__ doesn't support *args or **kwargs"
assert hasattr(self, name), (
"Attribute {} not found! "
"Default __repr__ only works if attributes match the constructor.".format(name)
)
attr = getattr(self, name)
default = param.default
if default is attr:
continue
attr_str = pprint.pformat(attr)
if "\n" in attr_str:
# don't show it if pformat decides to use >1 lines
attr_str = "..."
argstr.append("{}={}".format(name, attr_str))
return "{}({})".format(classname, ", ".join(argstr))
except AssertionError:
return super().__repr__()
__str__ = __repr__
class _TransformToAug(Augmentation):
def __init__(self, tfm: Transform):
self.tfm = tfm
def get_transform(self, *args):
return self.tfm
def __repr__(self):
return repr(self.tfm)
__str__ = __repr__
def _transform_to_aug(tfm_or_aug):
"""
Wrap Transform into Augmentation.
Private, used internally to implement augmentations.
"""
assert isinstance(tfm_or_aug, (Transform, Augmentation)), tfm_or_aug
if isinstance(tfm_or_aug, Augmentation):
return tfm_or_aug
else:
return _TransformToAug(tfm_or_aug)
class AugmentationList(Augmentation):
"""
Apply a sequence of augmentations.
It has ``__call__`` method to apply the augmentations.
Note that :meth:`get_transform` method is impossible (will throw error if called)
for :class:`AugmentationList`, because in order to apply a sequence of augmentations,
the kth augmentation must be applied first, to provide inputs needed by the (k+1)th
augmentation.
"""
def __init__(self, augs):
"""
Args:
augs (list[Augmentation or Transform]):
"""
super().__init__()
self.augs = [_transform_to_aug(x) for x in augs]
def __call__(self, aug_input) -> TransformList:
tfms = []
for x in self.augs:
tfm = x(aug_input)
tfms.append(tfm)
return TransformList(tfms)
def __repr__(self):
msgs = [str(x) for x in self.augs]
return "AugmentationList[{}]".format(", ".join(msgs))
__str__ = __repr__
class AugInput:
"""
Input that can be used with :meth:`Augmentation.__call__`.
This is a standard implementation for the majority of use cases.
This class provides the standard attributes **"image", "boxes", "sem_seg"**
defined in :meth:`__init__` and they may be needed by different augmentations.
Most augmentation policies do not need attributes beyond these three.
After applying augmentations to these attributes (using :meth:`AugInput.transform`),
the returned transforms can then be used to transform other data structures that users have.
Examples:
::
input = AugInput(image, boxes=boxes)
tfms = augmentation(input)
transformed_image = input.image
transformed_boxes = input.boxes
transformed_other_data = tfms.apply_other(other_data)
An extended project that works with new data types may implement augmentation policies
that need other inputs. An algorithm may need to transform inputs in a way different
from the standard approach defined in this class. In those rare situations, users can
implement a class similar to this class, that satify the following condition:
* The input must provide access to these data in the form of attribute access
(``getattr``). For example, if an :class:`Augmentation` to be applied needs "image"
and "sem_seg" arguments, its input must have the attribute "image" and "sem_seg".
* The input must have a ``transform(tfm: Transform) -> None`` method which
in-place transforms all its attributes.
"""
# TODO maybe should support more builtin data types here
def __init__(
self,
image: np.ndarray,
*,
boxes: Optional[np.ndarray] = None,
sem_seg: Optional[np.ndarray] = None,
):
"""
Args:
image (ndarray): (H,W) or (H,W,C) ndarray of type uint8 in range [0, 255], or
floating point in range [0, 1] or [0, 255]. The meaning of C is up
to users.
boxes (ndarray or None): Nx4 float32 boxes in XYXY_ABS mode
sem_seg (ndarray or None): HxW uint8 semantic segmentation mask. Each element
is an integer label of pixel.
"""
_check_img_dtype(image)
self.image = image
self.boxes = boxes
self.sem_seg = sem_seg
def transform(self, tfm: Transform) -> None:
"""
In-place transform all attributes of this class.
By "in-place", it means after calling this method, accessing an attribute such
as ``self.image`` will return transformed data.
"""
self.image = tfm.apply_image(self.image)
if self.boxes is not None:
self.boxes = tfm.apply_box(self.boxes)
if self.sem_seg is not None:
self.sem_seg = tfm.apply_segmentation(self.sem_seg)
def apply_augmentations(
self, augmentations: List[Union[Augmentation, Transform]]
) -> TransformList:
"""
Equivalent of ``AugmentationList(augmentations)(self)``
"""
return AugmentationList(augmentations)(self)
def apply_augmentations(augmentations: List[Union[Transform, Augmentation]], inputs):
"""
Use ``T.AugmentationList(augmentations)(inputs)`` instead.
"""
if isinstance(inputs, np.ndarray):
# handle the common case of image-only Augmentation, also for backward compatibility
image_only = True
inputs = AugInput(inputs)
else:
image_only = False
tfms = inputs.apply_augmentations(augmentations)
return inputs.image if image_only else inputs, tfms
apply_transform_gens = apply_augmentations
"""
Alias for backward-compatibility.
"""
TransformGen = Augmentation
"""
Alias for Augmentation, since it is something that generates :class:`Transform`s
"""
StandardAugInput = AugInput
"""
Alias for compatibility. It's not worth the complexity to have two classes.
"""
|