Spaces:

Koi953215
/

DiffIR2VR

Running on Zero

App Files Files Community

DiffIR2VR / utils /image_utils.py

jimmycv07

first commit

1de8821 5 months ago

raw

history blame contribute delete

11.1 kB

	# python3.7
	"""Contains utility functions for image processing.

	The module is primarily built on `cv2`. But, differently, we assume all colorful
	images are with `RGB` channel order by default. Also, we assume all gray-scale
	images to be with shape [height, width, 1].
	"""

	import os
	import cv2
	import numpy as np

	# File extensions regarding images (not including GIFs).
	IMAGE_EXTENSIONS = (
	'.bmp', '.ppm', '.pgm', '.jpeg', '.jpg', '.jpe', '.jp2', '.png', '.webp',
	'.tiff', '.tif'
	)

	def check_file_ext(filename, *ext_list):
	"""Checks whether the given filename is with target extension(s).

	NOTE: If `ext_list` is empty, this function will always return `False`.

	Args:
	filename: Filename to check.
	*ext_list: A list of extensions.

	Returns:
	`True` if the filename is with one of extensions in `ext_list`,
	otherwise `False`.
	"""
	if len(ext_list) == 0:
	return False
	ext_list = [ext if ext.startswith('.') else '.' + ext for ext in ext_list]
	ext_list = [ext.lower() for ext in ext_list]
	basename = os.path.basename(filename)
	ext = os.path.splitext(basename)[1].lower()
	return ext in ext_list


	def _check_2d_image(image):
	"""Checks whether a given image is valid.

	A valid image is expected to be with dtype `uint8`. Also, it should have
	shape like:

	(1) (height, width, 1) # gray-scale image.
	(2) (height, width, 3) # colorful image.
	(3) (height, width, 4) # colorful image with transparency (RGBA)
	"""
	assert isinstance(image, np.ndarray)
	assert image.dtype == np.uint8
	assert image.ndim == 3 and image.shape[2] in [1, 3, 4]


	def get_blank_image(height, width, channels=3, use_black=True):
	"""Gets a blank image, either white of black.

	NOTE: This function will always return an image with `RGB` channel order for
	color image and pixel range [0, 255].

	Args:
	height: Height of the returned image.
	width: Width of the returned image.
	channels: Number of channels. (default: 3)
	use_black: Whether to return a black image. (default: True)
	"""
	shape = (height, width, channels)
	if use_black:
	return np.zeros(shape, dtype=np.uint8)
	return np.ones(shape, dtype=np.uint8) * 255


	def load_image(path):
	"""Loads an image from disk.

	NOTE: This function will always return an image with `RGB` channel order for
	color image and pixel range [0, 255].

	Args:
	path: Path to load the image from.

	Returns:
	An image with dtype `np.ndarray`, or `None` if `path` does not exist.
	"""
	image = cv2.imread(path, cv2.IMREAD_UNCHANGED)
	if image is None:
	return None

	if image.ndim == 2:
	image = image[:, :, np.newaxis]
	_check_2d_image(image)
	if image.shape[2] == 3:
	return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
	if image.shape[2] == 4:
	return cv2.cvtColor(image, cv2.COLOR_BGRA2RGBA)
	return image


	def save_image(path, image):
	"""Saves an image to disk.

	NOTE: The input image (if colorful) is assumed to be with `RGB` channel
	order and pixel range [0, 255].

	Args:
	path: Path to save the image to.
	image: Image to save.
	"""
	if image is None:
	return

	_check_2d_image(image)
	if image.shape[2] == 1:
	cv2.imwrite(path, image)
	elif image.shape[2] == 3:
	cv2.imwrite(path, cv2.cvtColor(image, cv2.COLOR_RGB2BGR))
	elif image.shape[2] == 4:
	cv2.imwrite(path, cv2.cvtColor(image, cv2.COLOR_RGBA2BGRA))


	def resize_image(image, args, *kwargs):
	"""Resizes image.

	This is a wrap of `cv2.resize()`.

	NOTE: The channel order of the input image will not be changed.

	Args:
	image: Image to resize.
	*args: Additional positional arguments.
	**kwargs: Additional keyword arguments.

	Returns:
	An image with dtype `np.ndarray`, or `None` if `image` is empty.
	"""
	if image is None:
	return None

	_check_2d_image(image)
	if image.shape[2] == 1: # Re-expand the squeezed dim of gray-scale image.
	return cv2.resize(image, args, *kwargs)[:, :, np.newaxis]
	return cv2.resize(image, args, *kwargs)


	def add_text_to_image(image,
	text='',
	position=None,
	font=cv2.FONT_HERSHEY_TRIPLEX,
	font_size=1.0,
	line_type=cv2.LINE_8,
	line_width=1,
	color=(255, 255, 255)):
	"""Overlays text on given image.

	NOTE: The input image is assumed to be with `RGB` channel order.

	Args:
	image: The image to overlay text on.
	text: Text content to overlay on the image. (default: empty)
	position: Target position (bottom-left corner) to add text. If not set,
	center of the image will be used by default. (default: None)
	font: Font of the text added. (default: cv2.FONT_HERSHEY_TRIPLEX)
	font_size: Font size of the text added. (default: 1.0)
	line_type: Line type used to depict the text. (default: cv2.LINE_8)
	line_width: Line width used to depict the text. (default: 1)
	color: Color of the text added in `RGB` channel order. (default:
	(255, 255, 255))

	Returns:
	An image with target text overlaid on.
	"""
	if image is None or not text:
	return image

	_check_2d_image(image)
	cv2.putText(img=image,
	text=text,
	org=position,
	fontFace=font,
	fontScale=font_size,
	color=color,
	thickness=line_width,
	lineType=line_type,
	bottomLeftOrigin=False)
	return image


	def preprocess_image(image, min_val=-1.0, max_val=1.0):
	"""Pre-processes image by adjusting the pixel range and to dtype `float32`.

	This function is particularly used to convert an image or a batch of images
	to `NCHW` format, which matches the data type commonly used in deep models.

	NOTE: The input image is assumed to be with pixel range [0, 255] and with
	format `HWC` or `NHWC`. The returned image will be always be with format
	`NCHW`.

	Args:
	image: The input image for pre-processing.
	min_val: Minimum value of the output image.
	max_val: Maximum value of the output image.

	Returns:
	The pre-processed image.
	"""
	assert isinstance(image, np.ndarray)

	image = image.astype(np.float64)
	image = image / 255.0 * (max_val - min_val) + min_val

	if image.ndim == 3:
	image = image[np.newaxis]
	assert image.ndim == 4 and image.shape[3] in [1, 3, 4]
	return image.transpose(0, 3, 1, 2)


	def postprocess_image(image, min_val=-1.0, max_val=1.0):
	"""Post-processes image to pixel range [0, 255] with dtype `uint8`.

	This function is particularly used to handle the results produced by deep
	models.

	NOTE: The input image is assumed to be with format `NCHW`, and the returned
	image will always be with format `NHWC`.

	Args:
	image: The input image for post-processing.
	min_val: Expected minimum value of the input image.
	max_val: Expected maximum value of the input image.

	Returns:
	The post-processed image.
	"""
	assert isinstance(image, np.ndarray)

	image = image.astype(np.float64)
	image = (image - min_val) / (max_val - min_val) * 255
	image = np.clip(image + 0.5, 0, 255).astype(np.uint8)

	assert image.ndim == 4 and image.shape[1] in [1, 3, 4]
	return image.transpose(0, 2, 3, 1)


	def parse_image_size(obj):
	"""Parses an object to a pair of image size, i.e., (height, width).

	Args:
	obj: The input object to parse image size from.

	Returns:
	A two-element tuple, indicating image height and width respectively.

	Raises:
	If the input is invalid, i.e., neither a list or tuple, nor a string.
	"""
	if obj is None or obj == '':
	height = 0
	width = 0
	elif isinstance(obj, int):
	height = obj
	width = obj
	elif isinstance(obj, (list, tuple, str, np.ndarray)):
	if isinstance(obj, str):
	splits = obj.replace(' ', '').split(',')
	numbers = tuple(map(int, splits))
	else:
	numbers = tuple(obj)
	if len(numbers) == 0:
	height = 0
	width = 0
	elif len(numbers) == 1:
	height = int(numbers[0])
	width = int(numbers[0])
	elif len(numbers) == 2:
	height = int(numbers[0])
	width = int(numbers[1])
	else:
	raise ValueError('At most two elements for image size.')
	else:
	raise ValueError(f'Invalid type of input: `{type(obj)}`!')

	return (max(0, height), max(0, width))


	def get_grid_shape(size, height=0, width=0, is_portrait=False):
	"""Gets the shape of a grid based on the size.

	This function makes greatest effort on making the output grid square if
	neither `height` nor `width` is set. If `is_portrait` is set as `False`, the
	height will always be equal to or smaller than the width. For example, if
	input `size = 16`, output shape will be `(4, 4)`; if input `size = 15`,
	output shape will be (3, 5). Otherwise, the height will always be equal to
	or larger than the width.

	Args:
	size: Size (height * width) of the target grid.
	height: Expected height. If `size % height != 0`, this field will be
	ignored. (default: 0)
	width: Expected width. If `size % width != 0`, this field will be
	ignored. (default: 0)
	is_portrait: Whether to return a portrait size of a landscape size.
	(default: False)

	Returns:
	A two-element tuple, representing height and width respectively.
	"""
	assert isinstance(size, int)
	assert isinstance(height, int)
	assert isinstance(width, int)
	if size <= 0:
	return (0, 0)

	if height > 0 and width > 0 and height * width != size:
	height = 0
	width = 0

	if height > 0 and width > 0 and height * width == size:
	return (height, width)
	if height > 0 and size % height == 0:
	return (height, size // height)
	if width > 0 and size % width == 0:
	return (size // width, width)

	height = int(np.sqrt(size))
	while height > 0:
	if size % height == 0:
	width = size // height
	break
	height = height - 1

	return (width, height) if is_portrait else (height, width)


	def list_images_from_dir(directory):
	"""Lists all images from the given directory.

	NOTE: Do NOT support finding images recursively.

	Args:
	directory: The directory to find images from.

	Returns:
	A list of sorted filenames, with the directory as prefix.
	"""
	image_list = []
	for filename in os.listdir(directory):
	if check_file_ext(filename, *IMAGE_EXTENSIONS):
	image_list.append(os.path.join(directory, filename))
	return sorted(image_list)