Spaces:

de-Rodrigo
/

saliencies

Sleeping

App Files Files Community

saliencies / utils.py

de-Rodrigo

Visualize Saliency Maps

f53adeb 2 months ago

raw

history blame contribute delete

4.76 kB

	import cv2
	import os
	import glob
	import numpy as np
	from datetime import datetime


	def add_transparent_image(
	background, foreground, alpha_factor=1.0, x_offset=None, y_offset=None
	):
	"""
	Function sourced from StackOverflow contributor Ben.

	This function was found on StackOverflow and is the work of Ben, a contributor
	to the community. We are thankful for Ben's assistance by providing this useful
	method.

	Original Source:
	https://stackoverflow.com/questions/40895785/
	using-opencv-to-overlay-transparent-image-onto-another-image
	"""

	bg_h, bg_w, bg_channels = background.shape
	fg_h, fg_w, fg_channels = foreground.shape

	assert (
	bg_channels == 3
	), f"background image should have exactly 3 channels (RGB). found:{bg_channels}"
	assert (
	fg_channels == 4
	), f"foreground image should have exactly 4 channels (RGBA). found:{fg_channels}"

	# center by default
	if x_offset is None:
	x_offset = (bg_w - fg_w) // 2
	if y_offset is None:
	y_offset = (bg_h - fg_h) // 2

	w = min(fg_w, bg_w, fg_w + x_offset, bg_w - x_offset)
	h = min(fg_h, bg_h, fg_h + y_offset, bg_h - y_offset)

	if w < 1 or h < 1:
	return

	# clip foreground and background images to the overlapping regions
	bg_x = max(0, x_offset)
	bg_y = max(0, y_offset)
	fg_x = max(0, x_offset * -1)
	fg_y = max(0, y_offset * -1)
	foreground = foreground[fg_y : fg_y + h, fg_x : fg_x + w]
	background_subsection = background[bg_y : bg_y + h, bg_x : bg_x + w]

	# separate alpha and color channels from the foreground image
	foreground_colors = foreground[:, :, :3]
	foreground_colors = cv2.cvtColor(foreground_colors, cv2.COLOR_BGR2RGB)
	alpha_channel = foreground[:, :, 3] / 255 * alpha_factor # 0-255 => 0.0-1.0

	# construct an alpha_mask that matches the image shape
	alpha_mask = np.dstack((alpha_channel, alpha_channel, alpha_channel))

	# combine the background with the overlay image weighted by alpha
	composite = (
	background_subsection * (1 - alpha_mask) + foreground_colors * alpha_mask
	)

	# overwrite the section of the background image that has been updated
	background[bg_y : bg_y + h, bg_x : bg_x + w] = composite

	return background


	def convert_tensor_to_rgba_image(tensor):

	saliency_array = tensor.cpu().numpy()

	# Normalize img a 0-255
	if saliency_array.dtype != np.uint8:
	saliency_array = (255 * saliency_array / saliency_array.max()).astype(np.uint8)

	heatmap = cv2.applyColorMap(saliency_array, cv2.COLORMAP_JET)

	# Pixels are transparent where no saliency [128, 0, 0] is black in COLORMAP_JET
	alpha_channel = np.ones(heatmap.shape[:2], dtype=heatmap.dtype) * 255
	black_pixels_mask = np.all(heatmap == [128, 0, 0], axis=-1)
	alpha_channel[black_pixels_mask] = 0

	# Combinar los canales RGB y alfa
	saliency_rgba = cv2.merge((heatmap, alpha_channel))

	return saliency_rgba


	def convert_rgb_to_rgba_image(image):

	alpha_channel = np.ones(image.shape[:2], dtype=image.dtype) * 255
	rbga = cv2.merge((cv2.cvtColor(image, cv2.COLOR_RGB2BGR), alpha_channel))

	return rbga


	def label_frame(image, token):

	# Add the text
	font = cv2.FONT_HERSHEY_SIMPLEX
	font_scale = 0.7
	text_color = (255, 255, 255)
	text_thickness = 1
	text_size, _ = cv2.getTextSize(token, font, font_scale, text_thickness)
	text_position = (10, 10 + text_size[1])

	# Draw a rectangle behind the text
	rectangle_color = (0, 0, 0)
	rectangle_thickness = -1
	rectangle_position = (10, 10)
	rectangle_size = (text_size[0] + 5, text_size[1] + 5)
	cv2.rectangle(
	image,
	rectangle_position,
	(
	rectangle_position[0] + rectangle_size[0],
	rectangle_position[1] + rectangle_size[1],
	),
	rectangle_color,
	rectangle_thickness,
	)

	cv2.putText(
	image, token, text_position, font, font_scale, text_color, text_thickness
	)

	return image


	def saliency_video(path, sequence):

	image_files = sorted(glob.glob(os.path.join(path, "*.png")), key=os.path.getctime)
	image = cv2.imread(image_files[0])
	height = image.shape[0]
	widht = image.shape[1]

	# Create a VideoWriter object to save the video
	video_name = os.path.join(path, "saliency.mp4")
	fourcc = cv2.VideoWriter_fourcc(*"mp4v")

	video = cv2.VideoWriter(video_name, fourcc, 5, (widht, height))

	for image_file, token in zip(image_files, sequence):

	image = cv2.imread(image_file)

	# Write the image to the video
	video.write(image)

	# Release the VideoWriter object
	video.release()

	print(f"Video saved as {video_name}")