Spaces:
Running
on
L40S
Running
on
L40S
# -*- coding: utf-8 -*- | |
# Copyright (c) Facebook, Inc. and its affiliates. | |
""" | |
See "Data Augmentation" tutorial for an overview of the system: | |
https://detectron2.readthedocs.io/tutorials/augmentation.html | |
""" | |
import numpy as np | |
import torch | |
import torch.nn.functional as F | |
from fvcore.transforms.transform import ( | |
CropTransform, | |
HFlipTransform, | |
NoOpTransform, | |
Transform, | |
TransformList, | |
) | |
from PIL import Image | |
try: | |
import cv2 # noqa | |
except ImportError: | |
# OpenCV is an optional dependency at the moment | |
pass | |
__all__ = [ | |
"ExtentTransform", | |
"ResizeTransform", | |
"RotationTransform", | |
"ColorTransform", | |
"PILColorTransform", | |
] | |
class ExtentTransform(Transform): | |
""" | |
Extracts a subregion from the source image and scales it to the output size. | |
The fill color is used to map pixels from the source rect that fall outside | |
the source image. | |
See: https://pillow.readthedocs.io/en/latest/PIL.html#PIL.ImageTransform.ExtentTransform | |
""" | |
def __init__(self, src_rect, output_size, interp=Image.BILINEAR, fill=0): | |
""" | |
Args: | |
src_rect (x0, y0, x1, y1): src coordinates | |
output_size (h, w): dst image size | |
interp: PIL interpolation methods | |
fill: Fill color used when src_rect extends outside image | |
""" | |
super().__init__() | |
self._set_attributes(locals()) | |
def apply_image(self, img, interp=None): | |
h, w = self.output_size | |
if len(img.shape) > 2 and img.shape[2] == 1: | |
pil_image = Image.fromarray(img[:, :, 0], mode="L") | |
else: | |
pil_image = Image.fromarray(img) | |
pil_image = pil_image.transform( | |
size=(w, h), | |
method=Image.EXTENT, | |
data=self.src_rect, | |
resample=interp if interp else self.interp, | |
fill=self.fill, | |
) | |
ret = np.asarray(pil_image) | |
if len(img.shape) > 2 and img.shape[2] == 1: | |
ret = np.expand_dims(ret, -1) | |
return ret | |
def apply_coords(self, coords): | |
# Transform image center from source coordinates into output coordinates | |
# and then map the new origin to the corner of the output image. | |
h, w = self.output_size | |
x0, y0, x1, y1 = self.src_rect | |
new_coords = coords.astype(np.float32) | |
new_coords[:, 0] -= 0.5 * (x0 + x1) | |
new_coords[:, 1] -= 0.5 * (y0 + y1) | |
new_coords[:, 0] *= w / (x1 - x0) | |
new_coords[:, 1] *= h / (y1 - y0) | |
new_coords[:, 0] += 0.5 * w | |
new_coords[:, 1] += 0.5 * h | |
return new_coords | |
def apply_segmentation(self, segmentation): | |
segmentation = self.apply_image(segmentation, interp=Image.NEAREST) | |
return segmentation | |
class ResizeTransform(Transform): | |
""" | |
Resize the image to a target size. | |
""" | |
def __init__(self, h, w, new_h, new_w, interp=None): | |
""" | |
Args: | |
h, w (int): original image size | |
new_h, new_w (int): new image size | |
interp: PIL interpolation methods, defaults to bilinear. | |
""" | |
# TODO decide on PIL vs opencv | |
super().__init__() | |
if interp is None: | |
interp = Image.BILINEAR | |
self._set_attributes(locals()) | |
def apply_image(self, img, interp=None): | |
assert img.shape[:2] == (self.h, self.w) | |
assert len(img.shape) <= 4 | |
interp_method = interp if interp is not None else self.interp | |
if img.dtype == np.uint8: | |
if len(img.shape) > 2 and img.shape[2] == 1: | |
pil_image = Image.fromarray(img[:, :, 0], mode="L") | |
else: | |
pil_image = Image.fromarray(img) | |
pil_image = pil_image.resize((self.new_w, self.new_h), interp_method) | |
ret = np.asarray(pil_image) | |
if len(img.shape) > 2 and img.shape[2] == 1: | |
ret = np.expand_dims(ret, -1) | |
else: | |
# PIL only supports uint8 | |
if any(x < 0 for x in img.strides): | |
img = np.ascontiguousarray(img) | |
img = torch.from_numpy(img) | |
shape = list(img.shape) | |
shape_4d = shape[:2] + [1] * (4 - len(shape)) + shape[2:] | |
img = img.view(shape_4d).permute(2, 3, 0, 1) # hw(c) -> nchw | |
_PIL_RESIZE_TO_INTERPOLATE_MODE = { | |
Image.NEAREST: "nearest", | |
Image.BILINEAR: "bilinear", | |
Image.BICUBIC: "bicubic", | |
} | |
mode = _PIL_RESIZE_TO_INTERPOLATE_MODE[interp_method] | |
align_corners = None if mode == "nearest" else False | |
img = F.interpolate( | |
img, (self.new_h, self.new_w), mode=mode, align_corners=align_corners | |
) | |
shape[:2] = (self.new_h, self.new_w) | |
ret = img.permute(2, 3, 0, 1).view(shape).numpy() # nchw -> hw(c) | |
return ret | |
def apply_coords(self, coords): | |
coords[:, 0] = coords[:, 0] * (self.new_w * 1.0 / self.w) | |
coords[:, 1] = coords[:, 1] * (self.new_h * 1.0 / self.h) | |
return coords | |
def apply_segmentation(self, segmentation): | |
segmentation = self.apply_image(segmentation, interp=Image.NEAREST) | |
return segmentation | |
def inverse(self): | |
return ResizeTransform(self.new_h, self.new_w, self.h, self.w, self.interp) | |
class RotationTransform(Transform): | |
""" | |
This method returns a copy of this image, rotated the given | |
number of degrees counter clockwise around its center. | |
""" | |
def __init__(self, h, w, angle, expand=True, center=None, interp=None): | |
""" | |
Args: | |
h, w (int): original image size | |
angle (float): degrees for rotation | |
expand (bool): choose if the image should be resized to fit the whole | |
rotated image (default), or simply cropped | |
center (tuple (width, height)): coordinates of the rotation center | |
if left to None, the center will be fit to the center of each image | |
center has no effect if expand=True because it only affects shifting | |
interp: cv2 interpolation method, default cv2.INTER_LINEAR | |
""" | |
super().__init__() | |
image_center = np.array((w / 2, h / 2)) | |
if center is None: | |
center = image_center | |
if interp is None: | |
interp = cv2.INTER_LINEAR | |
abs_cos, abs_sin = (abs(np.cos(np.deg2rad(angle))), abs(np.sin(np.deg2rad(angle)))) | |
if expand: | |
# find the new width and height bounds | |
bound_w, bound_h = np.rint( | |
[h * abs_sin + w * abs_cos, h * abs_cos + w * abs_sin] | |
).astype(int) | |
else: | |
bound_w, bound_h = w, h | |
self._set_attributes(locals()) | |
self.rm_coords = self.create_rotation_matrix() | |
# Needed because of this problem https://github.com/opencv/opencv/issues/11784 | |
self.rm_image = self.create_rotation_matrix(offset=-0.5) | |
def apply_image(self, img, interp=None): | |
""" | |
img should be a numpy array, formatted as Height * Width * Nchannels | |
""" | |
if len(img) == 0 or self.angle % 360 == 0: | |
return img | |
assert img.shape[:2] == (self.h, self.w) | |
interp = interp if interp is not None else self.interp | |
return cv2.warpAffine(img, self.rm_image, (self.bound_w, self.bound_h), flags=interp) | |
def apply_coords(self, coords): | |
""" | |
coords should be a N * 2 array-like, containing N couples of (x, y) points | |
""" | |
coords = np.asarray(coords, dtype=float) | |
if len(coords) == 0 or self.angle % 360 == 0: | |
return coords | |
return cv2.transform(coords[:, np.newaxis, :], self.rm_coords)[:, 0, :] | |
def apply_segmentation(self, segmentation): | |
segmentation = self.apply_image(segmentation, interp=cv2.INTER_NEAREST) | |
return segmentation | |
def create_rotation_matrix(self, offset=0): | |
center = (self.center[0] + offset, self.center[1] + offset) | |
rm = cv2.getRotationMatrix2D(tuple(center), self.angle, 1) | |
if self.expand: | |
# Find the coordinates of the center of rotation in the new image | |
# The only point for which we know the future coordinates is the center of the image | |
rot_im_center = cv2.transform(self.image_center[None, None, :] + offset, rm)[0, 0, :] | |
new_center = np.array([self.bound_w / 2, self.bound_h / 2]) + offset - rot_im_center | |
# shift the rotation center to the new coordinates | |
rm[:, 2] += new_center | |
return rm | |
def inverse(self): | |
""" | |
The inverse is to rotate it back with expand, and crop to get the original shape. | |
""" | |
if not self.expand: # Not possible to inverse if a part of the image is lost | |
raise NotImplementedError() | |
rotation = RotationTransform( | |
self.bound_h, self.bound_w, -self.angle, True, None, self.interp | |
) | |
crop = CropTransform( | |
(rotation.bound_w - self.w) // 2, (rotation.bound_h - self.h) // 2, self.w, self.h | |
) | |
return TransformList([rotation, crop]) | |
class ColorTransform(Transform): | |
""" | |
Generic wrapper for any photometric transforms. | |
These transformations should only affect the color space and | |
not the coordinate space of the image (e.g. annotation | |
coordinates such as bounding boxes should not be changed) | |
""" | |
def __init__(self, op): | |
""" | |
Args: | |
op (Callable): operation to be applied to the image, | |
which takes in an ndarray and returns an ndarray. | |
""" | |
if not callable(op): | |
raise ValueError("op parameter should be callable") | |
super().__init__() | |
self._set_attributes(locals()) | |
def apply_image(self, img): | |
return self.op(img) | |
def apply_coords(self, coords): | |
return coords | |
def inverse(self): | |
return NoOpTransform() | |
def apply_segmentation(self, segmentation): | |
return segmentation | |
class PILColorTransform(ColorTransform): | |
""" | |
Generic wrapper for PIL Photometric image transforms, | |
which affect the color space and not the coordinate | |
space of the image | |
""" | |
def __init__(self, op): | |
""" | |
Args: | |
op (Callable): operation to be applied to the image, | |
which takes in a PIL Image and returns a transformed | |
PIL Image. | |
For reference on possible operations see: | |
- https://pillow.readthedocs.io/en/stable/ | |
""" | |
if not callable(op): | |
raise ValueError("op parameter should be callable") | |
super().__init__(op) | |
def apply_image(self, img): | |
img = Image.fromarray(img) | |
return np.asarray(super().apply_image(img)) | |
def HFlip_rotated_box(transform, rotated_boxes): | |
""" | |
Apply the horizontal flip transform on rotated boxes. | |
Args: | |
rotated_boxes (ndarray): Nx5 floating point array of | |
(x_center, y_center, width, height, angle_degrees) format | |
in absolute coordinates. | |
""" | |
# Transform x_center | |
rotated_boxes[:, 0] = transform.width - rotated_boxes[:, 0] | |
# Transform angle | |
rotated_boxes[:, 4] = -rotated_boxes[:, 4] | |
return rotated_boxes | |
def Resize_rotated_box(transform, rotated_boxes): | |
""" | |
Apply the resizing transform on rotated boxes. For details of how these (approximation) | |
formulas are derived, please refer to :meth:`RotatedBoxes.scale`. | |
Args: | |
rotated_boxes (ndarray): Nx5 floating point array of | |
(x_center, y_center, width, height, angle_degrees) format | |
in absolute coordinates. | |
""" | |
scale_factor_x = transform.new_w * 1.0 / transform.w | |
scale_factor_y = transform.new_h * 1.0 / transform.h | |
rotated_boxes[:, 0] *= scale_factor_x | |
rotated_boxes[:, 1] *= scale_factor_y | |
theta = rotated_boxes[:, 4] * np.pi / 180.0 | |
c = np.cos(theta) | |
s = np.sin(theta) | |
rotated_boxes[:, 2] *= np.sqrt(np.square(scale_factor_x * c) + np.square(scale_factor_y * s)) | |
rotated_boxes[:, 3] *= np.sqrt(np.square(scale_factor_x * s) + np.square(scale_factor_y * c)) | |
rotated_boxes[:, 4] = np.arctan2(scale_factor_x * s, scale_factor_y * c) * 180 / np.pi | |
return rotated_boxes | |
HFlipTransform.register_type("rotated_box", HFlip_rotated_box) | |
ResizeTransform.register_type("rotated_box", Resize_rotated_box) | |
# not necessary any more with latest fvcore | |
NoOpTransform.register_type("rotated_box", lambda t, x: x) | |