Spaces:

Flitto
/

image_cut_rect

Running

image_cut_rect / data_utils /image_utils.py

HERIUN

add files

6a07cb2 2 months ago

43.9 kB

	# References
	# https://sashamaps.net/docs/resources/20-colors/

	import numpy as np
	import cv2
	from scipy import ndimage as ndi
	from PIL import Image, ImageDraw, ImageCms, ExifTags, ImageEnhance
	import requests
	from pathlib import Path
	import pandas as pd
	from scipy.sparse import coo_matrix
	from skimage.feature import peak_local_max
	from skimage.morphology import local_maxima
	from skimage.segmentation import watershed
	from moviepy.video.io.bindings import mplfig_to_npimage
	import io
	import os
	from enum import Enum


	COLORS = (
	(230, 25, 75),
	(60, 180, 75),
	(255, 255, 25),
	(0, 130, 200),
	(245, 130, 48),
	(145, 30, 180),
	(70, 240, 250),
	(240, 50, 230),
	(210, 255, 60),
	(250, 190, 212),
	(0, 128, 128),
	(220, 190, 255),
	(170, 110, 40),
	(255, 250, 200),
	(128, 0, 0),
	(170, 255, 195),
	(128, 128, 0),
	(255, 215, 180),
	(0, 0, 128),
	(128, 128, 128),
	)


	class PC_TYPE(Enum):
	HARRIS = 1
	EDGES_CONTOURS = 2
	GFTT = 3
	FAST = 4
	KAZE = 5


	def _to_2d(img):
	# it use just first channel. if you want rgb2gray, use _to_grayscale
	if img.ndim == 3:
	return img[:, :, 0]
	else:
	return img


	def _to_3d(img):
	if img.ndim == 2:
	return np.dstack([img, img, img])
	else:
	return img


	def _to_byte(img: Image, format) -> bytes:
	# BytesIO is a file-like buffer stored in memory
	imgByteArr = io.BytesIO()
	# image.save expects a file-like as a argument
	img.save(imgByteArr, format=format)
	# Turn the BytesIO object back into a bytes object
	imgByteArr = imgByteArr.getvalue()
	return imgByteArr


	def _get_width_and_height(img):
	if img.ndim == 2:
	h, w = img.shape
	else:
	h, w, _ = img.shape
	return w, h


	def _get_resolution(img):
	w, h = _get_width_and_height(img)
	res = w * h
	return res


	def _to_pil(img):
	if not isinstance(img, Image.Image):
	img = Image.fromarray(img, mode="RGB")
	return img


	def _to_array(img):
	img = np.array(img)
	return img


	def _bool_to_uint8(img):
	uint8 = img.astype("uint8")
	if (
	np.array_equal(np.unique(uint8), np.array([0, 1]))
	or np.array_equal(np.unique(uint8), np.array([0]))
	or np.array_equal(np.unique(uint8), np.array([1]))
	):
	return uint8 * 255
	else:
	return uint8


	def _figure_to_array(fig):
	arr = mplfig_to_npimage(fig)
	return arr


	def _preprocess_image(img):
	if img.dtype == "int32":
	img = _repaint_segmentation_map(img)

	if img.dtype == "bool":
	img = img.astype("uint8") * 255

	if img.ndim == 2:
	if (
	np.array_equal(np.unique(img), np.array([0, 255]))
	or np.array_equal(np.unique(img), np.array([0]))
	or np.array_equal(np.unique(img), np.array([255]))
	):
	img = _to_3d(img)
	else:
	img = _apply_jet_colormap(img)
	return img


	def _blend_two_images(img1, img2, alpha=0.5):
	img1 = _to_pil(img1)
	img2 = _to_pil(img2)
	img_blended = Image.blend(im1=img1, im2=img2, alpha=alpha)
	return _to_array(img_blended)


	def _repaint_segmentation_map(seg_map):
	canvas_r = _get_canvas_same_size_as_image(seg_map, black=True)
	canvas_g = _get_canvas_same_size_as_image(seg_map, black=True)
	canvas_b = _get_canvas_same_size_as_image(seg_map, black=True)

	remainder_map = seg_map % len(COLORS) + 1
	for remainder, (r, g, b) in enumerate(COLORS, start=1):
	canvas_r[remainder_map == remainder] = r
	canvas_g[remainder_map == remainder] = g
	canvas_b[remainder_map == remainder] = b
	canvas_r[seg_map == 0] = 0
	canvas_g[seg_map == 0] = 0
	canvas_b[seg_map == 0] = 0

	dstacked = np.dstack([canvas_r, canvas_g, canvas_b])
	return dstacked


	def _get_canvas_same_size_as_image(img, black=False):
	if black:
	return np.zeros_like(img).astype("uint8")
	else:
	return (np.ones_like(img) * 255).astype("uint8")


	def _get_canvas(w, h, black=False):
	if black:
	return np.zeros((h, w, 3)).astype("uint8")
	else:
	return (np.ones((h, w, 3)) * 255).astype("uint8")


	def _invert_image(mask):
	return cv2.bitwise_not(mask.astype("uint8"))


	def _to_grayscale(img):
	gray_img = cv2.cvtColor(src=img, code=cv2.COLOR_RGB2GRAY)
	return gray_img


	def _erode_mask(mask, kernel_size=3):
	kernel = cv2.getStructuringElement(
	shape=cv2.MORPH_RECT, ksize=(kernel_size, kernel_size)
	)
	if mask.dtype == "bool":
	mask = mask.astype("uint8") * 255
	mask = cv2.erode(src=mask, kernel=kernel)
	return mask


	def _dilate_mask(mask, kernel_size=3):
	if kernel_size == 0:
	return mask
	kernel = cv2.getStructuringElement(
	shape=cv2.MORPH_RECT, ksize=(kernel_size, kernel_size)
	)
	if mask.dtype == "bool":
	mask = mask.astype("uint8") * 255
	mask = cv2.dilate(src=mask, kernel=kernel)
	return mask


	def _gaussian_blur_mask(mask, kernel_size=5):
	blurred_mask = cv2.GaussianBlur(
	src=mask, ksize=(kernel_size, kernel_size), sigmaX=0
	)
	# mask = (blurred_mask >= 32).astype("uint8") * 255
	mask = (blurred_mask != 0).astype("uint8") * 255
	return mask


	def _blur(img, v=0.04):
	w, h = _get_width_and_height(img)
	kernel_size = round(min(w, h) * v)
	bl = cv2.GaussianBlur(
	src=img.copy(order="C"),
	ksize=(kernel_size // 2 * 2 + 1, kernel_size // 2 * 2 + 1),
	sigmaX=0,
	)
	return bl


	def _get_adaptive_thresholded_image(img, invert=False, block_size=3):
	gray_img = cv2.cvtColor(src=img, code=cv2.COLOR_RGB2GRAY)

	thrsh_type = cv2.THRESH_BINARY if not invert else cv2.THRESH_BINARY_INV
	img_thr = cv2.adaptiveThreshold(
	src=gray_img,
	maxValue=255,
	adaptiveMethod=cv2.ADAPTIVE_THRESH_MEAN_C,
	thresholdType=thrsh_type,
	blockSize=block_size,
	C=0,
	)
	return img_thr


	def _make_segmentation_map_rectangle(seg_map):
	seg_map_copied = seg_map.copy(order="C")
	for idx in range(1, np.max(seg_map_copied) + 1):
	seg_map_sub = seg_map_copied == idx
	nonzero_x = np.where((seg_map_sub != 0).any(axis=0))[0]
	nonzero_y = np.where((seg_map_sub != 0).any(axis=1))[0]
	if nonzero_x.size != 0 and nonzero_y.size != 0:
	seg_map_copied[
	nonzero_y[0] : nonzero_y[-1], nonzero_x[0] : nonzero_x[-1]
	] = idx
	return seg_map_copied


	def _apply_jet_colormap(img):
	img_jet = cv2.applyColorMap(src=(255 - img), colormap=cv2.COLORMAP_JET)
	return img_jet


	def _reverse_jet_colormap(img):
	gray_values = np.arange(256, dtype=np.uint8)
	color_values = list(map(tuple, _apply_jet_colormap(gray_values).reshape(256, 3)))
	color_to_gray_map = dict(zip(color_values, gray_values))

	out = np.apply_along_axis(
	lambda bgr: color_to_gray_map[tuple(bgr)], axis=2, arr=img
	)
	return out


	def _get_pixel_counts(arr, sort=False, include_zero=False):
	unique, cnts = np.unique(arr, return_counts=True)
	idx2cnt = dict(zip(unique, cnts))

	if not include_zero:
	if 0 in idx2cnt:
	idx2cnt.pop(0)

	if not sort:
	return idx2cnt
	else:
	return dict(sorted(idx2cnt.items(), key=lambda x: x[1], reverse=True))


	def _combine_masks(masks):
	canvas = _get_canvas_same_size_as_image(img=masks[0], black=True)
	for mask in masks:
	canvas = np.maximum(_to_3d(canvas), _to_3d(mask))
	return canvas


	def _get_local_maxima_coordinates(region_score_map, region_seg_map=None, th=150):
	# `src_lang="ja"`일 때 `150`이 더 잘 작동함.
	if region_seg_map is None:
	_, region_mask = cv2.threshold(
	src=region_score_map, thresh=th, maxval=255, type=cv2.THRESH_BINARY
	)
	_, region_seg_map = cv2.connectedComponents(image=region_mask, connectivity=4)
	local_max = peak_local_max(
	image=region_score_map,
	min_distance=5,
	labels=region_seg_map,
	num_peaks_per_label=24,
	)
	local_max = local_max[:, ::-1] # yx to xy
	return local_max


	def _get_local_maxima_array(region_score_map, region_seg_map=None, th=150):
	local_max_coor = _get_local_maxima_coordinates(
	region_score_map, region_seg_map=None, th=th
	)

	_, h = _get_width_and_height(local_max_coor)
	vals = np.array([1] * h)
	rows = local_max_coor[:, 1]
	cols = local_max_coor[:, 0]
	local_max = (
	coo_matrix((vals, (rows, cols)), shape=region_score_map.shape)
	.toarray()
	.astype("bool")
	)
	return local_max


	def _mask_image(img, mask, invert=False):
	"""img에서 mask 영역에 해당하는 부분만 추출

	Args:
	img (_PIL or np.ndarray_): 이미지
	mask (_PIL or np.ndarray_): 마스크 (H,W,C)일경우 흑백으로 변환 후 or (H,W)
	invert (bool, optional): invert_mask로 추출할지.

	Returns:
	_np.ndarray_: 결과 이미지
	"""
	img = _to_array(img)
	mask = _to_2d(_to_array(mask))
	if invert:
	mask = _invert_image(mask)
	return cv2.bitwise_and(src1=img, src2=img, mask=mask.astype("uint8"))


	def _ignore_small_regions_in_mask(mask, area_thresh=10):
	mask = _to_2d(mask)

	_, seg_map, stats, _ = cv2.connectedComponentsWithStats(
	mask.astype("uint8"), connectivity=4
	)
	bool = np.isin(seg_map, np.where(stats[:, cv2.CC_STAT_AREA] >= area_thresh)[0][1:])
	new_mask = bool.astype("uint8") * 255
	new_mask = _to_3d(new_mask)
	return new_mask


	def _crop_image(img, l, t, r, b):
	w, h = _get_width_and_height(img)
	return img[
	int(max(0, t)) : int(min(h, b)),
	int(max(0, l)) : int(min(w, r)),
	...,
	]


	def _bboxes_to_mask(img, bboxes):
	canvas = _get_canvas_same_size_as_image(img=img, black=True)
	for row in bboxes.itertuples():
	canvas[row.bbox_y1 : row.bbox_y2, row.bbox_x1 : row.bbox_x2] = 255
	return _to_3d(canvas)


	def _apply_watershed(mask, region_score_map, th=150):
	local_max_arr = _get_local_maxima_array(region_score_map, th=th)
	_, markers = cv2.connectedComponents(
	image=local_max_arr.astype("uint8"), connectivity=4
	)
	seg_map = watershed(image=-region_score_map, markers=markers, mask=_to_2d(mask))
	return seg_map


	def _perform_watershed(score_map, score_thresh=80):
	trimmed_score_map = score_map.copy()
	trimmed_score_map[trimmed_score_map < 190] = 0

	markers = local_maxima(image=trimmed_score_map, allow_borders=False)
	_, markers = cv2.connectedComponents(image=markers.astype("int8"), connectivity=8)

	_, region_mask = cv2.threshold(
	src=score_map, thresh=score_thresh, maxval=255, type=cv2.THRESH_BINARY
	)
	watersheded = watershed(image=-score_map, markers=markers, mask=_to_2d(region_mask))
	return watersheded


	def _get_region_segmentation_map(region_score_map, region_thresh=30):
	_, region_mask = cv2.threshold(
	src=region_score_map, thresh=region_thresh, maxval=255, type=cv2.THRESH_BINARY
	)
	region_seg_map = _apply_watershed(
	region_score_map=region_score_map, mask=region_mask
	)
	return region_seg_map


	def _combine_two_segmentation_maps(seg_map1, seg_map2):
	seg_map = seg_map1 + _mask_image(
	img=seg_map2 + len(np.unique(seg_map1)) - 1, mask=(seg_map2 != 0)
	)
	px_cnts = _get_pixel_counts(seg_map, sort=True, include_zero=True)
	seg_map = _mask_image(img=seg_map, mask=(seg_map != list(px_cnts)[0]))
	return seg_map


	def _get_image_segmentation_map(img, region_score_map=None, block_size=3):
	if region_score_map is not None:
	_, region_mask = cv2.threshold(
	src=region_score_map, thresh=20, maxval=255, type=cv2.THRESH_BINARY
	)
	region_mask = _dilate_mask(img=region_mask, kernel_size=16)
	img_masked = _mask_image(img=img, mask=region_mask)
	else:
	img_masked = img

	img_thr1 = _get_adaptive_thresholded_image(
	img=img_masked, invert=False, block_size=block_size
	)
	img_thr2 = _get_adaptive_thresholded_image(
	img=img_masked, invert=True, block_size=block_size
	)

	_, seg_map1 = cv2.connectedComponents(image=img_thr1, connectivity=4)
	_, seg_map2 = cv2.connectedComponents(image=img_thr2, connectivity=4)
	seg_map = _combine_two_segmentation_maps(seg_map1=seg_map1, seg_map2=seg_map2)
	return seg_map


	def _get_segmentation_map_overlapping_mask(seg_map, mask, overlap_thresh=0.6):
	img_pixel_counts = _get_pixel_counts(seg_map, sort=True, include_zero=False)

	overlapping_seg_map = _mask_image(img=seg_map, mask=(mask != 0))
	overlapping_counts = _get_pixel_counts(
	overlapping_seg_map, sort=False, include_zero=False
	)

	df_counts = pd.DataFrame.from_dict(
	img_pixel_counts, orient="index", columns=["total_pixel_count"]
	)
	df_counts["overlap_pixel_count"] = df_counts.apply(
	lambda x: overlapping_counts.get(x.name, 0), axis=1
	)
	df_counts["ratio"] = (
	df_counts["overlap_pixel_count"] / df_counts["total_pixel_count"]
	)

	region_is_inside = df_counts[df_counts["ratio"] > overlap_thresh].index.tolist()
	mask = np.isin(seg_map, region_is_inside).astype("uint8")
	mask = _to_3d(mask * 255)
	return mask


	def _split_segmentation_map(seg_map, pccs):
	ls_idx = (
	pccs[pccs["inside"]]
	.apply(lambda x: seg_map[x["y"], x["x"]], axis=1)
	.values.tolist()
	)

	seg_map1 = _mask_image(img=seg_map, mask=np.isin(seg_map, ls_idx))
	seg_map2 = _mask_image(img=seg_map, mask=~np.isin(seg_map, ls_idx))
	return seg_map1, seg_map2


	def _segmentation_map_to_mask(seg_map):
	return _to_3d((seg_map != 0).astype("uint8") * 255)


	def _get_pseudo_character_centers_from_mask(mask, bboxes: pd.DataFrame = None):
	"""Mask 이미지로부터 label(글자)의 중심 좌표를 구하는 함수"""
	center_coords = []
	num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(
	image=_to_2d(mask), connectivity=8
	)
	for i in range(1, num_labels):
	center_coords.append((int(centroids[i][0]), int(centroids[i][1])))

	pccs = pd.DataFrame(
	center_coords,
	columns=[
	"x",
	"y",
	],
	)

	if not bboxes.empty:
	# 벡터화 연산으로 bbox 안에 있는지 검사
	pccs["inside"] = (
	(pccs["x"].values[:, None] > bboxes["bbox_x1"].values) &
	(pccs["x"].values[:, None] < bboxes["bbox_x2"].values) &
	(pccs["y"].values[:, None] > bboxes["bbox_y1"].values) &
	(pccs["y"].values[:, None] < bboxes["bbox_y2"].values)
	).any(axis=1)
	else:
	pccs["inside"] = True

	return pccs


	def _get_pseudo_character_centers(
	region_score_map, region_seg_map=None, bboxes=pd.DataFrame()
	):
	local_max_coor = _get_local_maxima_coordinates(
	region_score_map, region_seg_map=region_seg_map
	)
	pccs = pd.DataFrame(local_max_coor, columns=["x", "y"])

	if not bboxes.empty:
	# 벡터화 연산으로 bbox 안에 있는지 검사
	pccs["inside"] = (
	(pccs["x"].values[:, None] > bboxes["bbox_x1"].values) &
	(pccs["x"].values[:, None] < bboxes["bbox_x2"].values) &
	(pccs["y"].values[:, None] > bboxes["bbox_y1"].values) &
	(pccs["y"].values[:, None] < bboxes["bbox_y2"].values)
	).any(axis=1)
	else:
	pccs["inside"] = True

	return pccs


	def _convert_region_score_map_to_region_mask(region_score_map, region_score_thresh=170):
	_, region_mask = cv2.threshold(
	src=region_score_map, thresh=30, maxval=255, type=cv2.THRESH_BINARY
	)

	new_mask = _get_canvas_same_size_as_image(img=region_mask, black=True)

	n_labels, seg_map, _, _ = cv2.connectedComponentsWithStats(
	image=_to_2d(region_mask), connectivity=4
	)
	for k in range(1, n_labels):
	if np.max(region_score_map[seg_map == k]) < region_score_thresh:
	continue

	new_mask[seg_map == k] = 255
	new_mask = _to_3d(new_mask)
	return new_mask


	def _split_mask(mask, region_score_map=None, bboxes=pd.DataFrame(), th=30):
	"""mask를 두 종류로 나눕니다. 각각 inpainting과정에서 지워야할 mask와 복구해야할 mask 영역을 의미합니다.
	mask1과 mask2는 서로 겹칠수도 있습니다.
	동작원리 : region_score_map(이 안주어질 경우 dst_mask_map)을 th로 이진화 및 segmap으로 변형(Connected components)후
	label영역 별 Local maximum 포인트를 watershed의 marker로 여겨 watershed를 진행한 결과를 segmap으로 여기고,
	pccs를 peak_loacl_max(skimage)함수로 region_scoremap과 segmap을 이용해 구한다. 이때 bbox정보도 포함시켜, 각 pccs가 box안에 들어 오는지 확인한 후
	bbox안에 있는 pccs에 대해 각 pccs가 속한 segmap의 label영역(seg_map1)과 속하지 못한 label 영역(seg_map2)로 나눈다.

	Args:
	mask (_np.ndarray_): (H,W,3)의 mask. values : (0 or 255)
	region_score_map (_np.ndarray_): region_score_map, craft의 결과. 글의 중심을 강조하는 Heat map
	bboxes (_pd.DataFrame_): 박스 좌표정보(bbox_x1,bbox_y1,bbox_x2,bbox_y2)가 포함된 dataFrame.
	Returns:
	_np.ndarray_: 지워야 하는 부분인 mask1. 복구해야 하는 부분인 mask2.
	"""

	if region_score_map is None:
	dst_mask_map = _to_2d(get_dst_mask(mask))
	seg_map = _apply_watershed(mask=mask, region_score_map=dst_mask_map, th=th)
	pccs = _get_pseudo_character_centers(
	region_score_map=dst_mask_map, region_seg_map=seg_map, bboxes=bboxes
	)
	else:
	seg_map = _apply_watershed(mask, region_score_map, th=th)
	pccs = _get_pseudo_character_centers(
	region_score_map=region_score_map, region_seg_map=seg_map, bboxes=bboxes
	)

	box_mask = _bboxes_to_mask(seg_map, bboxes)

	seg_map1, seg_map2 = _split_segmentation_map(seg_map=seg_map, pccs=pccs)
	mask1 = _segmentation_map_to_mask(seg_map1)
	mask2 = _segmentation_map_to_mask(seg_map2)
	mask3 = _to_3d(_mask_image(mask1, box_mask, invert=True))
	mask2 = _combine_masks([mask2, mask3])
	return mask1, mask2


	def get_word_segmentation_map(region_score_map, affinity_score_map):
	_, region_mask = cv2.threshold(
	src=region_score_map, thresh=70, maxval=255, type=cv2.THRESH_BINARY
	)
	_, affinity_mask = cv2.threshold(
	src=affinity_score_map, thresh=70, maxval=255, type=cv2.THRESH_BINARY
	)
	word_mask = region_mask + affinity_mask

	_, segmentation_map_word = cv2.connectedComponents(image=word_mask, connectivity=4)
	return segmentation_map_word


	def get_line_segmentation_map(line_score_map):
	_, line_mask = cv2.threshold(
	src=line_score_map, thresh=130, maxval=255, type=cv2.THRESH_BINARY
	)
	_, line_segmentation_map = cv2.connectedComponents(image=line_mask, connectivity=4)
	return line_segmentation_map


	def _get_3d_block_segmentation_map(img, bboxes):
	segmentation_map_block = np.zeros(
	shape=(img.shape[0], img.shape[1], len(bboxes) + 1)
	)
	for idx, (xmin, ymin, xmax, ymax) in enumerate(
	bboxes[["xmin", "ymin", "xmax", "ymax"]].values, start=1
	):
	segmentation_map_block[ymin:ymax, xmin:xmax, idx] = 255
	return segmentation_map_block


	def compare_images(img1, img2, flag=cv2.CMP_EQ):
	# 두 이미지가 같은 영역을 255 아닌 영역을 0. flag는 cv2.CMP_XX참고(EQ==같으면1,NE==다르면1)
	return cv2.compare(img1, img2, flag)


	def convert_webp_png_get_data(img: np.ndarray):
	pil_img = _to_pil(img)
	convert_pil_img = pil_img.convert("RGB")
	convert_pil_img.save("temp.png")
	_, byte, format = load_image("temp.png", with_byte=True, with_format=True)
	os.remove("temp.png")

	return byte


	def add_water_mark(original_img, water_mark_img_path):
	if isinstance(original_img, np.ndarray):
	original_img = _to_pil(original_img)
	return_np = True
	else:
	return_np = False
	watermark = Image.open(water_mark_img_path).convert("RGBA")

	width_o, height_o = original_img.size
	width_wm, height_wm = watermark.size

	position = ((width_o - width_wm) // 2, (height_o - height_wm) // 2)

	# 원본 이미지보다 크기가 작은 경우에만 워터마크 이미지를 비율에 맞게 조정
	if width_wm > width_o or height_wm > height_o:
	# 워터마크 이미지의 가로 세로 비율 계산
	ratio_w = width_o / width_wm
	ratio_h = height_o / height_wm
	# 더 작은 비율을 선택하여 워터마크 이미지를 조정
	ratio = min(ratio_w, ratio_h)
	new_width = int(width_wm * ratio)
	new_height = int(height_wm * ratio)
	watermark = watermark.resize((new_width, new_height), Image.Resampling.LANCZOS)
	width_wm, height_wm = watermark.size

	# 새로 계산된 위치
	position = ((width_o - width_wm) // 2, (height_o - height_wm) // 2)

	original_img.paste(watermark, position, watermark)
	rgb_image = original_img.convert("RGB")

	if return_np:
	return _to_array(rgb_image)
	return rgb_image


	def load_image(url_or_path, with_byte=False, with_format=False):
	if "http" in url_or_path:
	url_or_path = str(url_or_path)
	response = requests.get(url_or_path)
	PIL_image = Image.open(io.BytesIO(response.content))
	format = PIL_image.format
	image_bytes = response.content
	if format == "GIF":
	img_exif = None
	else:
	img_exif = PIL_image._getexif()
	if PIL_image.mode in ["L", "P", "PA", "RGBA"]:
	PIL_image = Image.open(io.BytesIO(response.content)).convert("RGB")
	if img_exif:
	for k in img_exif.keys():
	attr = ExifTags.TAGS.get(k, "no_key")
	if attr != "no_key":
	if ExifTags.TAGS[k] == "Orientation":
	if img_exif[k] == 3:
	PIL_image = PIL_image.rotate(180, expand=True)
	elif img_exif[k] == 6:
	PIL_image = PIL_image.rotate(270, expand=True)
	elif img_exif[k] == 8:
	PIL_image = PIL_image.rotate(90, expand=True)
	break
	if PIL_image.mode == "CMYK":
	cmyk_profile = ImageCms.ImageCmsProfile("resources/USWebCoatedSWOP.icc")
	srgb_profile = ImageCms.ImageCmsProfile(
	"resources/sRGB Color Space Profile.icm"
	)
	PIL_image = ImageCms.profileToProfile(
	PIL_image, cmyk_profile, srgb_profile, outputMode="RGB"
	)
	img = np.array(PIL_image)
	else:
	img = np.array(PIL_image)
	else:
	# img = cv2.imread(url_or_path, flags=cv2.IMREAD_COLOR)
	# img = cv2.cvtColor(src=img, code=cv2.COLOR_BGR2RGB)
	PIL_image = Image.open(url_or_path)
	format = PIL_image.format
	byte_arr = io.BytesIO()
	if PIL_image.mode == "RGBA":
	PIL_image = PIL_image.convert("RGB")
	PIL_image.save(byte_arr, format="JPEG")
	image_bytes = byte_arr.getvalue()
	img = np.array(PIL_image)

	# if "http" in url_or_path:
	# img = cv2.imdecode(
	# np.asarray(bytearray(requests.get(url_or_path).content), dtype="uint8"), flags=cv2.IMREAD_COLOR
	# )
	# else:
	# img = cv2.imread(url_or_path, flags=cv2.IMREAD_COLOR)
	# img = cv2.cvtColor(src=img, code=cv2.COLOR_BGR2RGB)
	if with_byte:
	if with_format:
	return img, image_bytes, format
	else:
	return img, image_bytes

	return img


	def save_image(img1, img2=None, alpha=0.5, path="") -> None:
	copied_img1 = _preprocess_image(_to_array(img1.copy(order="C")))
	if img2 is None:
	img_arr = copied_img1
	else:
	copied_img2 = _to_array(_preprocess_image(_to_array(img2.copy(order="C"))))
	img_arr = _to_array(
	_blend_two_images(img1=copied_img1, img2=copied_img2, alpha=alpha)
	)

	path = Path(path)
	path.parent.mkdir(parents=True, exist_ok=True)

	if os.path.splitext(str(path))[1] == ".gif":
	pil = _to_pil(img1)
	pil.save(str(path))
	return True

	if img_arr.ndim == 3:
	cv2.imwrite(
	filename=str(path),
	img=img_arr[:, :, ::-1],
	params=[cv2.IMWRITE_JPEG_QUALITY, 100],
	)
	elif img_arr.ndim == 2:
	cv2.imwrite(
	filename=str(path), img=img_arr, params=[cv2.IMWRITE_JPEG_QUALITY, 100]
	)


	def show_image(img1, img2=None, alpha=0.5):
	img1 = _to_pil(_preprocess_image(_to_array(img1)))
	if img2 is None:
	img1.show()
	else:
	img2 = _to_pil(_preprocess_image(_to_array(img2)))
	img_blended = Image.blend(im1=img1, im2=img2, alpha=alpha)
	img_blended.show()


	def draw_bboxes(img, bboxes: pd.DataFrame, index=False):
	"""속성추출전 원본 이미지와 bboxes정보를 가지고 이미지위에 bboxes를 시각화 해주는 함수."""
	canvas = _to_pil(_get_canvas_same_size_as_image(img=img, black=True))
	draw = ImageDraw.Draw(canvas)
	dic = dict()
	for row in bboxes.itertuples():
	h = row.bbox_y2 - row.bbox_y1
	w = row.bbox_x2 - row.bbox_x1
	smaller = min(w, h)
	thickness = max(1, smaller // 22)

	dic[row.Index] = ((0, 255, 0), (0, 100, 0), thickness)

	for row in bboxes.itertuples():
	_, fill, thickness = dic[row.Index]
	draw.rectangle(
	xy=(row.bbox_x1, row.bbox_y1, row.bbox_x2, row.bbox_y2),
	outline=None,
	fill=fill,
	width=thickness,
	)
	for row in bboxes.itertuples():
	outline, _, thickness = dic[row.Index]
	draw.rectangle(
	xy=(row.bbox_x1, row.bbox_y1, row.bbox_x2, row.bbox_y2),
	outline=outline,
	fill=None,
	width=thickness,
	)

	if index:
	from data_utils.rendering_utils import _get_font

	max_len = max(map(len, map(str, bboxes.index)))
	for row in bboxes.itertuples():
	h = row.bbox_y2 - row.bbox_y1
	w = row.bbox_x2 - row.bbox_x1
	smaller = min(w, h)
	font_size = max(10, min(40, smaller // 4))

	draw.text(
	xy=(row.bbox_x1, row.bbox_y1 - 4),
	text=str(row.Index).zfill(max_len),
	fill="white",
	stroke_fill="black",
	stroke_width=2,
	font=_get_font(lang="en", font_size=font_size),
	anchor="ls",
	)
	return _blend_two_images(img1=canvas, img2=img, alpha=0.4)


	def visualize_clusters(img, bboxes, index=False):
	from data_utils.rendering_utils import _get_font

	canvas = _to_pil(_get_canvas_same_size_as_image(img=img, black=True))
	draw = ImageDraw.Draw(canvas)
	dic = dict()
	for row in bboxes.itertuples():
	h = row.bbox_y2 - row.bbox_y1
	w = row.bbox_x2 - row.bbox_x1
	smaller = min(w, h)
	thickness = max(1, smaller // 22)

	dic[row.Index] = ((255, 255, 255), COLORS[row.cluster], thickness)

	for row in bboxes.itertuples():
	_, fill, thickness = dic[row.Index]
	draw.rectangle(
	xy=(row.bbox_x1, row.bbox_y1, row.bbox_x2, row.bbox_y2),
	outline=None,
	fill=fill,
	width=1,
	)
	for row in bboxes.itertuples():
	outline, _, thickness = dic[row.Index]
	draw.rectangle(
	xy=(row.bbox_x1, row.bbox_y1, row.bbox_x2, row.bbox_y2),
	outline=outline,
	fill=None,
	width=1,
	)

	if index:
	for row in bboxes.itertuples():
	h = row.bbox_y2 - row.bbox_y1
	w = row.bbox_x2 - row.bbox_x1
	smaller = min(w, h)
	font_size = max(14, min(40, smaller * 0.35))

	draw.text(
	xy=(row.bbox_x1, row.bbox_y1 - 4),
	text=str(row.cluster),
	fill="white",
	stroke_fill="black",
	stroke_width=2,
	font=_get_font(lang="en", font_size=font_size),
	anchor="ls",
	)
	return _blend_two_images(img1=canvas, img2=img, alpha=0.25)


	def draw_bboxes_and_textboxes(bboxes, img):
	canvas = img.copy(order="C")
	for row in bboxes.itertuples():
	cv2.rectangle(
	img=canvas,
	pt1=(row.bbox_x1, row.bbox_y1),
	pt2=(row.bbox_x2, row.bbox_y2),
	color=(0, 255, 0),
	thickness=4,
	)
	cv2.rectangle(
	img=canvas,
	pt1=(row.tbox_x1, row.tbox_y1),
	pt2=(row.tbox_x2, row.tbox_y2),
	color=(255, 0, 0),
	thickness=2,
	)
	return canvas


	def draw_pseudo_character_centers(img, pccs, margin=4):
	canvas = _to_pil(_get_canvas_same_size_as_image(img=img, black=True))
	draw = ImageDraw.Draw(canvas)
	for row in pccs.itertuples():
	draw.ellipse(
	xy=(row.x - margin, row.y - margin, row.x + margin, row.y + margin),
	outline=(255, 0, 0),
	fill=(100, 0, 0),
	)
	return _blend_two_images(img1=canvas, img2=img, alpha=0.3)


	def _resize_image(img, w, h):
	ori_w, ori_h = _get_width_and_height(img)
	if w < ori_w or h < ori_h:
	interpolation = cv2.INTER_AREA
	else:
	interpolation = cv2.INTER_LANCZOS4
	resized_img = cv2.resize(src=img, dsize=(w, h), interpolation=interpolation)
	return resized_img


	def _resize_image_using_shorter_side(img, img_size=1530):
	ori_w, ori_h = _get_width_and_height(img)
	shorter = min(ori_w, ori_h)
	if shorter <= img_size:
	return img
	if ori_w < ori_h:
	resized_img = cv2.resize(
	src=img,
	dsize=(img_size, round(ori_h * (img_size / ori_w))),
	interpolation=cv2.INTER_AREA,
	)
	else:
	resized_img = cv2.resize(
	src=img,
	dsize=(round(ori_w * (img_size / ori_h)), img_size),
	interpolation=cv2.INTER_AREA,
	)
	return resized_img


	def _resize_image_using_longer_side(img, img_size=2560):
	ori_w, ori_h = _get_width_and_height(img)
	longer = max(ori_w, ori_h)
	if longer <= img_size:
	return img
	if ori_w < ori_h:
	resized_img = cv2.resize(
	src=img,
	dsize=(round(ori_w * (img_size / ori_h)), img_size),
	interpolation=cv2.INTER_AREA,
	)
	else:
	resized_img = cv2.resize(
	src=img,
	dsize=(img_size, round(ori_h * (img_size / ori_w))),
	interpolation=cv2.INTER_AREA,
	)
	return resized_img


	def _split_image_3(img, print=False):
	if img.ndim == 2:
	is_2d = True
	else:
	is_2d = False

	img = _to_3d(img)
	w, h = _get_width_and_height(img)
	if h >= w:
	if print:
	print(f"Resolution: {w}, {h} -> {w}, {h // 2}")
	img1 = img[: h // 2, :, :]
	img2 = img[h // 4 : h // 4 + h // 2, :, :]
	img3 = img[-h // 2 :, :, :]
	else:
	if print:
	print(f"Resolution: {w}, {h} -> {w // 2}, {h}")
	img1 = img[:, : w // 2, :]
	img2 = img[:, w // 2 // 2 : w // 2 // 2 + w // 2, :]
	img3 = img[:, -w // 2 :, :]
	if is_2d:
	img1 = _to_2d(img1)
	img2 = _to_2d(img2)
	img3 = _to_2d(img3)
	return img1, img2, img3


	def _split_image_2(img, print=False):
	if img.ndim == 2:
	is_2d = True
	else:
	is_2d = False

	img = _to_3d(img)
	w, h = _get_width_and_height(img)
	if h >= w:
	if print:
	print(f"Resolution: {w}, {h} -> {w}, {h // 2}")
	img1 = img[: h // 2, :, :]
	img3 = img[-h // 2 :, :, :]
	else:
	if print:
	print(f"Resolution: {w}, {h} -> {w // 2}, {h}")
	img1 = img[:, : w // 2, :]
	img3 = img[:, -w // 2 :, :]
	if is_2d:
	img1 = _to_2d(img1)
	img3 = _to_2d(img3)
	return img1, img3


	def _combine_images_3(img, img1, img2, img3):
	if (img1 is None) and (img2 is None) and (img3 is None):
	canvas = None
	else:
	img1 = _to_2d(img1)
	img2 = _to_2d(img2)
	img3 = _to_2d(img3)

	canvas = _get_canvas_same_size_as_image(_to_2d(img), black=True)

	w, h = _get_width_and_height(img)
	if h >= w:
	canvas[: h // 2, :] = img1
	canvas[h // 2 // 2 : h // 2 // 2 + h // 2, :] = np.maximum(
	canvas[h // 2 // 2 : h // 2 // 2 + h // 2, :], img2
	)
	canvas[-h // 2 :, :] = np.maximum(canvas[-h // 2 :, :], img3)
	else:
	canvas[:, : w // 2] = img1
	canvas[:, w // 2 // 2 : w // 2 // 2 + w // 2] = np.maximum(
	canvas[:, w // 2 // 2 : w // 2 // 2 + w // 2], img2
	)
	canvas[:, -w // 2 :] = np.maximum(canvas[:, -w // 2 :], img3)
	return canvas


	def _combine_images_2(img, img1, img2):
	if (img1 is None) and (img2 is None):
	canvas = None
	else:
	canvas = _get_canvas_same_size_as_image(img, black=True)

	w, h = _get_width_and_height(img)
	if h >= w:
	canvas[: h // 2, :] = img1
	canvas[-h // 2 :, :] = np.maximum(canvas[-h // 2 :, :], img2)
	else:
	canvas[:, : w // 2] = img1
	canvas[:, -w // 2 :] = np.maximum(canvas[:, -w // 2 :], img2)
	return canvas


	def _rotate_90_degrees(img, counterclockwise=False):
	return cv2.rotate(
	src=img,
	rotateCode=cv2.ROTATE_90_COUNTERCLOCKWISE
	if counterclockwise
	else cv2.ROTATE_90_CLOCKWISE,
	)


	def save_image_patches(img, bboxes, dir):
	for row in bboxes.itertuples():
	patch = _crop_image(
	img=img,
	l=row.bbox_x1,
	t=row.bbox_y1,
	r=row.bbox_x2,
	b=row.bbox_y2,
	)
	patch_w = row.bbox_x2 - row.bbox_x1
	patch_h = row.bbox_y2 - row.bbox_y1
	if patch_h > patch_w:
	patch = _rotate_90_degrees(patch, counterclockwise=False)

	save_image(img1=patch, path=Path(dir) / f"{str(row.Index).zfill(4)}.jpg")


	def get_minimum_area_bounding_rectangle(mask):
	bool = _to_2d(mask.astype("uint8")) != 0
	nonzero_x = np.where(bool.any(axis=0))[0]
	nonzero_y = np.where(bool.any(axis=1))[0]
	if len(nonzero_x) != 0 and len(nonzero_y) != 0:
	bbox_x1 = nonzero_x[0]
	bbox_x2 = nonzero_x[-1]
	bbox_y1 = nonzero_y[0]
	bbox_y2 = nonzero_y[-1]
	return int(bbox_x1), int(bbox_y1), int(bbox_x2), int(bbox_y2)
	else:
	return 0, 0, 0, 0


	def get_minimum_area_bounding_rectangle2(mask, l, t, r, b):
	bool = _to_2d(mask.astype("uint8")) != 0
	nonzero_x = np.where(bool.any(axis=0))[0]
	nonzero_y = np.where(bool.any(axis=1))[0]
	try:
	new_l = nonzero_x[np.where(l < nonzero_x)][0]
	except Exception:
	new_l = l
	try:
	new_t = nonzero_y[np.where(t < nonzero_y)][0]
	except Exception:
	new_t = t
	try:
	new_r = nonzero_x[np.where(nonzero_x < r)][-1]
	except Exception:
	new_r = r
	try:
	new_b = nonzero_y[np.where(nonzero_y < b)][-1]
	except Exception:
	new_b = b
	return new_l, new_t, new_r, new_b


	def _downsample_image(img):
	ori_w, ori_h = _get_width_and_height(img)
	resized = _resize_image(img, w=ori_w // 2, h=ori_h // 2)
	return resized


	def _upsample_image(img):
	ori_w, ori_h = _get_width_and_height(img)
	resized = _resize_image(img, w=ori_w * 2, h=ori_h * 2)
	return resized


	def _get_pseudo_image(img, mask, invert=False):
	if invert:
	mask = _invert_image(mask)
	rows, cols = np.nonzero(_to_2d(mask))
	pseudo_outer = img[rows, cols, :].reshape((1, -1, 3))
	return pseudo_outer


	def resize_coordinates_and_image_to_fit_to_maximum_pixel_counts(
	bboxes, img, max_pixel_counts=1530
	):
	w, h = _get_width_and_height(img)
	ratio = min(max_pixel_counts / h, max_pixel_counts / w)
	if ratio < 1:
	for col in ["xmin", "ymin", "xmax", "ymax"]:
	bboxes[col] = bboxes[col].apply(lambda x: int(x * ratio))

	img = cv2.resize(
	src=img,
	dsize=(int(w * ratio), int(h * ratio)),
	interpolation=cv2.INTER_LANCZOS4,
	)
	return bboxes, img


	def get_image_patches_3(img, text_stroke_mask, mask1, mask2):
	splitting_mask = get_splitting_mask(text_stroke_mask)

	_, _, stats, _ = cv2.connectedComponentsWithStats(
	image=_to_2d(splitting_mask), connectivity=4
	)
	ls_patches = list()
	for xmin, ymin, width, height, px_cnt in stats[1:, :]:
	xmax = xmin + width
	ymax = ymin + height

	cropped_img = _crop_image(img=img, l=xmin, t=ymin, r=xmax, b=ymax)
	cropped_mask1 = _crop_image(img=mask1, l=xmin, t=ymin, r=xmax, b=ymax)
	cropped_mask2 = _crop_image(img=mask2, l=xmin, t=ymin, r=xmax, b=ymax)
	ls_patches.append(
	{
	"xmin": xmin,
	"ymin": ymin,
	"xmax": xmax,
	"ymax": ymax,
	"img": cropped_img,
	"mask1": cropped_mask1,
	"mask2": cropped_mask2,
	}
	)
	return ls_patches


	def get_image_patches_2(img, mask1, mask2):
	splitting_mask = get_splitting_mask(mask1)

	_, _, stats, _ = cv2.connectedComponentsWithStats(
	image=_to_2d(splitting_mask), connectivity=4
	)
	ls_patches = list()
	for x1, y1, w, h, _ in stats[1:, :]:
	x2 = x1 + w
	y2 = y1 + h

	cropped_img = _crop_image(img=img, l=x1, t=y1, r=x2, b=y2)
	cropped_mask1 = _crop_image(img=mask1, l=x1, t=y1, r=x2, b=y2)
	cropped_mask2 = _crop_image(img=mask2, l=x1, t=y1, r=x2, b=y2)

	ls_patches.append(
	{
	"x1": x1,
	"y1": y1,
	"x2": x2,
	"y2": y2,
	"img": cropped_img,
	"mask1": cropped_mask1,
	"mask2": cropped_mask2,
	}
	)
	return ls_patches


	def get_splitting_mask(text_stroke_mask):
	splitting_mask = _dilate_mask(text_stroke_mask, kernel_size=200)
	return splitting_mask


	def enhance_sharpness(img):
	"""img의 선명도를 높임. 3가지 방법이 있음(sharpening filter, unsharpening mask, pil sharpening)
	3 방법 중 PIL 이 가장 원본의 색변화가 적음
	Args:
	img (_np.ndarray_): 이미지

	Returns:
	_np.ndarray_: 결과 이미지
	"""
	# sharpening_k = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
	# hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
	# sharpened_v = cv2.filter2D(hsv[..., 2], -1, sharpening_k)
	# hsv[..., 2] = sharpened_v
	# img_patch2 = cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB)

	# src_ycrcb = cv2.cvtColor(img, cv2.COLOR_RGB2YCrCb)
	# src_f = src_ycrcb[:, :, 0].astype(np.float32)
	# blr = cv2.GaussianBlur(src_f, (0, 0), 2.0)
	# src_ycrcb[:, :, 0] = np.clip(2. * src_f - blr, 0, 255).astype(np.uint8)
	# img_patch3 = cv2.cvtColor(src_ycrcb, cv2.COLOR_YCrCb2RGB)

	pil_img = _to_pil(img)
	sharpness_img = ImageEnhance.Sharpness(pil_img).enhance(2)
	result_img = _to_array(sharpness_img)

	return result_img


	def mask2point(mask):
	# mask (H,W,3) 0 or 255 -> (N,2)
	mask = _to_2d(mask)
	indices = np.argwhere(mask == 255)
	return indices


	def get_corner(corner_coords):
	# corner_coords (N,2) each point means (y,x)
	cy, cx = np.mean(corner_coords, axis=0)
	quadrant_1 = corner_coords[(corner_coords[:, 0] < cy) & (corner_coords[:, 1] >= cx)]
	rt = quadrant_1[:, 1].max(), quadrant_1[:, 0].min()

	quadrant_2 = corner_coords[(corner_coords[:, 0] < cy) & (corner_coords[:, 1] < cx)]
	lt = quadrant_2[:, 1].min(), quadrant_2[:, 0].min()

	quadrant_3 = corner_coords[(corner_coords[:, 0] >= cy) & (corner_coords[:, 1] < cx)]
	lb = quadrant_3[:, 1].min(), quadrant_3[:, 0].max()

	quadrant_4 = corner_coords[
	(corner_coords[:, 0] >= cy) & (corner_coords[:, 1] >= cx)
	]
	rb = quadrant_4[:, 1].max(), quadrant_4[:, 0].max()

	return lt, rt, rb, lb


	def get_dst_mask(mask):
	mask = _to_2d(mask)
	dst = cv2.distanceTransform(mask, cv2.DIST_L2, 5)
	# 거리 값을 0 ~ 255 범위로 정규화 ---②
	dist_transform_normalized = cv2.normalize(
	dst, None, 0, 255, cv2.NORM_MINMAX, dtype=cv2.CV_8U
	)
	return _to_3d(dist_transform_normalized)


	def unwarp(img, src, dst):
	h, w = img.shape[:2]
	# use cv2.getPerspectiveTransform() to get M, the transform matrix, and Minv, the inverse
	M = cv2.getPerspectiveTransform(src, dst)
	# use cv2.warpPerspective() to warp your image to a top-down view
	warped = cv2.warpPerspective(img, M, (w, h), flags=cv2.INTER_LINEAR)

	return warped, M


	def perspective_correction(img, src=None, vis=False, method: PC_TYPE = PC_TYPE.HARRIS):
	# img (H,W,C) 0~255, src=[[ltx,lty],[rtx,rty],[rbx,rby],[lbx,lby]]
	if src is None:
	gray = _to_grayscale(img)

	if not isinstance(method, PC_TYPE):
	raise ValueError(
	f"Invalid method: {method}. Expected one of {list(PC_TYPE)}."
	)

	if method == PC_TYPE.HARRIS:
	corner = cv2.cornerHarris(gray, 5, 3, 0.04) # (H,W) value: corner score
	threshold = 0.005 * corner.max()
	corner_coords = np.argwhere(corner > threshold)

	elif method == PC_TYPE.EDGES_CONTOURS:
	blurred = cv2.GaussianBlur(gray, (5, 5), 0)
	edges = cv2.Canny(blurred, 50, 150)
	contours, _ = cv2.findContours(
	edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
	)
	contour_points = []
	for cs in contours:
	c = [css for css in cs]
	contour_points.extend(c)
	corner_coords = np.array(contour_points).reshape(-1, 2)[..., ::-1]

	elif method == PC_TYPE.GFTT:
	corners = cv2.goodFeaturesToTrack(
	gray, 0, 0.01, 5, blockSize=3, useHarrisDetector=True, k=0.03
	)
	corner_coords = corners.reshape(corners.shape[0], 2)[..., ::-1]

	elif method == PC_TYPE.FAST:
	th = 50
	fast = cv2.FastFeatureDetector_create(th)
	keypoints = fast.detect(gray)
	corner_coords = np.array([[kp.pt[1], kp.pt[0]] for kp in keypoints])

	elif method == PC_TYPE.KAZE:
	# feature = cv2.SIFT_create()
	feature = cv2.KAZE_create()

	keypoints = feature.detect(gray)
	corner_coords = np.array([[kp.pt[1], kp.pt[0]] for kp in keypoints])

	if vis:
	view_img = img.copy()
	for corner in corner_coords:
	y, x = corner
	cv2.circle(view_img, (int(x), int(y)), 3, (255, 0, 0), 2)
	save_image(view_img, path="vis_corner.png")

	lt, rt, rb, lb = get_corner(corner_coords)

	src = np.float32([lt, rt, rb, lb])

	dst = np.float32(
	[
	(0, 0),
	(img.shape[1] - 1, 0),
	(img.shape[1] - 1, img.shape[0] - 1),
	(0, img.shape[0] - 1),
	]
	)

	result, M = unwarp(img, src, dst)
	save_image(result, path="cv_result.png")
	return result


	if __name__ == "__main__":
	image_url = "https://d2reotjpatzlok.cloudfront.net/qr-place/item/QR_20240726_2441_2_LZ1ZFCT38HN7PPCEZR8H.jpg"
	img, imgdata, format = load_image(image_url, with_byte=True, with_format=True)
	perspective_correction(img, vis=True)