Spaces:
Running
Running
# References | |
# https://sashamaps.net/docs/resources/20-colors/ | |
import numpy as np | |
import cv2 | |
from scipy import ndimage as ndi | |
from PIL import Image, ImageDraw, ImageCms, ExifTags, ImageEnhance | |
import requests | |
from pathlib import Path | |
import pandas as pd | |
from scipy.sparse import coo_matrix | |
from skimage.feature import peak_local_max | |
from skimage.morphology import local_maxima | |
from skimage.segmentation import watershed | |
from moviepy.video.io.bindings import mplfig_to_npimage | |
import io | |
import os | |
from enum import Enum | |
COLORS = ( | |
(230, 25, 75), | |
(60, 180, 75), | |
(255, 255, 25), | |
(0, 130, 200), | |
(245, 130, 48), | |
(145, 30, 180), | |
(70, 240, 250), | |
(240, 50, 230), | |
(210, 255, 60), | |
(250, 190, 212), | |
(0, 128, 128), | |
(220, 190, 255), | |
(170, 110, 40), | |
(255, 250, 200), | |
(128, 0, 0), | |
(170, 255, 195), | |
(128, 128, 0), | |
(255, 215, 180), | |
(0, 0, 128), | |
(128, 128, 128), | |
) | |
class PC_TYPE(Enum): | |
HARRIS = 1 | |
EDGES_CONTOURS = 2 | |
GFTT = 3 | |
FAST = 4 | |
KAZE = 5 | |
def _to_2d(img): | |
# it use just first channel. if you want rgb2gray, use _to_grayscale | |
if img.ndim == 3: | |
return img[:, :, 0] | |
else: | |
return img | |
def _to_3d(img): | |
if img.ndim == 2: | |
return np.dstack([img, img, img]) | |
else: | |
return img | |
def _to_byte(img: Image, format) -> bytes: | |
# BytesIO is a file-like buffer stored in memory | |
imgByteArr = io.BytesIO() | |
# image.save expects a file-like as a argument | |
img.save(imgByteArr, format=format) | |
# Turn the BytesIO object back into a bytes object | |
imgByteArr = imgByteArr.getvalue() | |
return imgByteArr | |
def _get_width_and_height(img): | |
if img.ndim == 2: | |
h, w = img.shape | |
else: | |
h, w, _ = img.shape | |
return w, h | |
def _get_resolution(img): | |
w, h = _get_width_and_height(img) | |
res = w * h | |
return res | |
def _to_pil(img): | |
if not isinstance(img, Image.Image): | |
img = Image.fromarray(img, mode="RGB") | |
return img | |
def _to_array(img): | |
img = np.array(img) | |
return img | |
def _bool_to_uint8(img): | |
uint8 = img.astype("uint8") | |
if ( | |
np.array_equal(np.unique(uint8), np.array([0, 1])) | |
or np.array_equal(np.unique(uint8), np.array([0])) | |
or np.array_equal(np.unique(uint8), np.array([1])) | |
): | |
return uint8 * 255 | |
else: | |
return uint8 | |
def _figure_to_array(fig): | |
arr = mplfig_to_npimage(fig) | |
return arr | |
def _preprocess_image(img): | |
if img.dtype == "int32": | |
img = _repaint_segmentation_map(img) | |
if img.dtype == "bool": | |
img = img.astype("uint8") * 255 | |
if img.ndim == 2: | |
if ( | |
np.array_equal(np.unique(img), np.array([0, 255])) | |
or np.array_equal(np.unique(img), np.array([0])) | |
or np.array_equal(np.unique(img), np.array([255])) | |
): | |
img = _to_3d(img) | |
else: | |
img = _apply_jet_colormap(img) | |
return img | |
def _blend_two_images(img1, img2, alpha=0.5): | |
img1 = _to_pil(img1) | |
img2 = _to_pil(img2) | |
img_blended = Image.blend(im1=img1, im2=img2, alpha=alpha) | |
return _to_array(img_blended) | |
def _repaint_segmentation_map(seg_map): | |
canvas_r = _get_canvas_same_size_as_image(seg_map, black=True) | |
canvas_g = _get_canvas_same_size_as_image(seg_map, black=True) | |
canvas_b = _get_canvas_same_size_as_image(seg_map, black=True) | |
remainder_map = seg_map % len(COLORS) + 1 | |
for remainder, (r, g, b) in enumerate(COLORS, start=1): | |
canvas_r[remainder_map == remainder] = r | |
canvas_g[remainder_map == remainder] = g | |
canvas_b[remainder_map == remainder] = b | |
canvas_r[seg_map == 0] = 0 | |
canvas_g[seg_map == 0] = 0 | |
canvas_b[seg_map == 0] = 0 | |
dstacked = np.dstack([canvas_r, canvas_g, canvas_b]) | |
return dstacked | |
def _get_canvas_same_size_as_image(img, black=False): | |
if black: | |
return np.zeros_like(img).astype("uint8") | |
else: | |
return (np.ones_like(img) * 255).astype("uint8") | |
def _get_canvas(w, h, black=False): | |
if black: | |
return np.zeros((h, w, 3)).astype("uint8") | |
else: | |
return (np.ones((h, w, 3)) * 255).astype("uint8") | |
def _invert_image(mask): | |
return cv2.bitwise_not(mask.astype("uint8")) | |
def _to_grayscale(img): | |
gray_img = cv2.cvtColor(src=img, code=cv2.COLOR_RGB2GRAY) | |
return gray_img | |
def _erode_mask(mask, kernel_size=3): | |
kernel = cv2.getStructuringElement( | |
shape=cv2.MORPH_RECT, ksize=(kernel_size, kernel_size) | |
) | |
if mask.dtype == "bool": | |
mask = mask.astype("uint8") * 255 | |
mask = cv2.erode(src=mask, kernel=kernel) | |
return mask | |
def _dilate_mask(mask, kernel_size=3): | |
if kernel_size == 0: | |
return mask | |
kernel = cv2.getStructuringElement( | |
shape=cv2.MORPH_RECT, ksize=(kernel_size, kernel_size) | |
) | |
if mask.dtype == "bool": | |
mask = mask.astype("uint8") * 255 | |
mask = cv2.dilate(src=mask, kernel=kernel) | |
return mask | |
def _gaussian_blur_mask(mask, kernel_size=5): | |
blurred_mask = cv2.GaussianBlur( | |
src=mask, ksize=(kernel_size, kernel_size), sigmaX=0 | |
) | |
# mask = (blurred_mask >= 32).astype("uint8") * 255 | |
mask = (blurred_mask != 0).astype("uint8") * 255 | |
return mask | |
def _blur(img, v=0.04): | |
w, h = _get_width_and_height(img) | |
kernel_size = round(min(w, h) * v) | |
bl = cv2.GaussianBlur( | |
src=img.copy(order="C"), | |
ksize=(kernel_size // 2 * 2 + 1, kernel_size // 2 * 2 + 1), | |
sigmaX=0, | |
) | |
return bl | |
def _get_adaptive_thresholded_image(img, invert=False, block_size=3): | |
gray_img = cv2.cvtColor(src=img, code=cv2.COLOR_RGB2GRAY) | |
thrsh_type = cv2.THRESH_BINARY if not invert else cv2.THRESH_BINARY_INV | |
img_thr = cv2.adaptiveThreshold( | |
src=gray_img, | |
maxValue=255, | |
adaptiveMethod=cv2.ADAPTIVE_THRESH_MEAN_C, | |
thresholdType=thrsh_type, | |
blockSize=block_size, | |
C=0, | |
) | |
return img_thr | |
def _make_segmentation_map_rectangle(seg_map): | |
seg_map_copied = seg_map.copy(order="C") | |
for idx in range(1, np.max(seg_map_copied) + 1): | |
seg_map_sub = seg_map_copied == idx | |
nonzero_x = np.where((seg_map_sub != 0).any(axis=0))[0] | |
nonzero_y = np.where((seg_map_sub != 0).any(axis=1))[0] | |
if nonzero_x.size != 0 and nonzero_y.size != 0: | |
seg_map_copied[ | |
nonzero_y[0] : nonzero_y[-1], nonzero_x[0] : nonzero_x[-1] | |
] = idx | |
return seg_map_copied | |
def _apply_jet_colormap(img): | |
img_jet = cv2.applyColorMap(src=(255 - img), colormap=cv2.COLORMAP_JET) | |
return img_jet | |
def _reverse_jet_colormap(img): | |
gray_values = np.arange(256, dtype=np.uint8) | |
color_values = list(map(tuple, _apply_jet_colormap(gray_values).reshape(256, 3))) | |
color_to_gray_map = dict(zip(color_values, gray_values)) | |
out = np.apply_along_axis( | |
lambda bgr: color_to_gray_map[tuple(bgr)], axis=2, arr=img | |
) | |
return out | |
def _get_pixel_counts(arr, sort=False, include_zero=False): | |
unique, cnts = np.unique(arr, return_counts=True) | |
idx2cnt = dict(zip(unique, cnts)) | |
if not include_zero: | |
if 0 in idx2cnt: | |
idx2cnt.pop(0) | |
if not sort: | |
return idx2cnt | |
else: | |
return dict(sorted(idx2cnt.items(), key=lambda x: x[1], reverse=True)) | |
def _combine_masks(masks): | |
canvas = _get_canvas_same_size_as_image(img=masks[0], black=True) | |
for mask in masks: | |
canvas = np.maximum(_to_3d(canvas), _to_3d(mask)) | |
return canvas | |
def _get_local_maxima_coordinates(region_score_map, region_seg_map=None, th=150): | |
# `src_lang="ja"`์ผ ๋ `150`์ด ๋ ์ ์๋ํจ. | |
if region_seg_map is None: | |
_, region_mask = cv2.threshold( | |
src=region_score_map, thresh=th, maxval=255, type=cv2.THRESH_BINARY | |
) | |
_, region_seg_map = cv2.connectedComponents(image=region_mask, connectivity=4) | |
local_max = peak_local_max( | |
image=region_score_map, | |
min_distance=5, | |
labels=region_seg_map, | |
num_peaks_per_label=24, | |
) | |
local_max = local_max[:, ::-1] # yx to xy | |
return local_max | |
def _get_local_maxima_array(region_score_map, region_seg_map=None, th=150): | |
local_max_coor = _get_local_maxima_coordinates( | |
region_score_map, region_seg_map=None, th=th | |
) | |
_, h = _get_width_and_height(local_max_coor) | |
vals = np.array([1] * h) | |
rows = local_max_coor[:, 1] | |
cols = local_max_coor[:, 0] | |
local_max = ( | |
coo_matrix((vals, (rows, cols)), shape=region_score_map.shape) | |
.toarray() | |
.astype("bool") | |
) | |
return local_max | |
def _mask_image(img, mask, invert=False): | |
"""img์์ mask ์์ญ์ ํด๋นํ๋ ๋ถ๋ถ๋ง ์ถ์ถ | |
Args: | |
img (_PIL or np.ndarray_): ์ด๋ฏธ์ง | |
mask (_PIL or np.ndarray_): ๋ง์คํฌ (H,W,C)์ผ๊ฒฝ์ฐ ํ๋ฐฑ์ผ๋ก ๋ณํ ํ or (H,W) | |
invert (bool, optional): invert_mask๋ก ์ถ์ถํ ์ง. | |
Returns: | |
_np.ndarray_: ๊ฒฐ๊ณผ ์ด๋ฏธ์ง | |
""" | |
img = _to_array(img) | |
mask = _to_2d(_to_array(mask)) | |
if invert: | |
mask = _invert_image(mask) | |
return cv2.bitwise_and(src1=img, src2=img, mask=mask.astype("uint8")) | |
def _ignore_small_regions_in_mask(mask, area_thresh=10): | |
mask = _to_2d(mask) | |
_, seg_map, stats, _ = cv2.connectedComponentsWithStats( | |
mask.astype("uint8"), connectivity=4 | |
) | |
bool = np.isin(seg_map, np.where(stats[:, cv2.CC_STAT_AREA] >= area_thresh)[0][1:]) | |
new_mask = bool.astype("uint8") * 255 | |
new_mask = _to_3d(new_mask) | |
return new_mask | |
def _crop_image(img, l, t, r, b): | |
w, h = _get_width_and_height(img) | |
return img[ | |
int(max(0, t)) : int(min(h, b)), | |
int(max(0, l)) : int(min(w, r)), | |
..., | |
] | |
def _bboxes_to_mask(img, bboxes): | |
canvas = _get_canvas_same_size_as_image(img=img, black=True) | |
for row in bboxes.itertuples(): | |
canvas[row.bbox_y1 : row.bbox_y2, row.bbox_x1 : row.bbox_x2] = 255 | |
return _to_3d(canvas) | |
def _apply_watershed(mask, region_score_map, th=150): | |
local_max_arr = _get_local_maxima_array(region_score_map, th=th) | |
_, markers = cv2.connectedComponents( | |
image=local_max_arr.astype("uint8"), connectivity=4 | |
) | |
seg_map = watershed(image=-region_score_map, markers=markers, mask=_to_2d(mask)) | |
return seg_map | |
def _perform_watershed(score_map, score_thresh=80): | |
trimmed_score_map = score_map.copy() | |
trimmed_score_map[trimmed_score_map < 190] = 0 | |
markers = local_maxima(image=trimmed_score_map, allow_borders=False) | |
_, markers = cv2.connectedComponents(image=markers.astype("int8"), connectivity=8) | |
_, region_mask = cv2.threshold( | |
src=score_map, thresh=score_thresh, maxval=255, type=cv2.THRESH_BINARY | |
) | |
watersheded = watershed(image=-score_map, markers=markers, mask=_to_2d(region_mask)) | |
return watersheded | |
def _get_region_segmentation_map(region_score_map, region_thresh=30): | |
_, region_mask = cv2.threshold( | |
src=region_score_map, thresh=region_thresh, maxval=255, type=cv2.THRESH_BINARY | |
) | |
region_seg_map = _apply_watershed( | |
region_score_map=region_score_map, mask=region_mask | |
) | |
return region_seg_map | |
def _combine_two_segmentation_maps(seg_map1, seg_map2): | |
seg_map = seg_map1 + _mask_image( | |
img=seg_map2 + len(np.unique(seg_map1)) - 1, mask=(seg_map2 != 0) | |
) | |
px_cnts = _get_pixel_counts(seg_map, sort=True, include_zero=True) | |
seg_map = _mask_image(img=seg_map, mask=(seg_map != list(px_cnts)[0])) | |
return seg_map | |
def _get_image_segmentation_map(img, region_score_map=None, block_size=3): | |
if region_score_map is not None: | |
_, region_mask = cv2.threshold( | |
src=region_score_map, thresh=20, maxval=255, type=cv2.THRESH_BINARY | |
) | |
region_mask = _dilate_mask(img=region_mask, kernel_size=16) | |
img_masked = _mask_image(img=img, mask=region_mask) | |
else: | |
img_masked = img | |
img_thr1 = _get_adaptive_thresholded_image( | |
img=img_masked, invert=False, block_size=block_size | |
) | |
img_thr2 = _get_adaptive_thresholded_image( | |
img=img_masked, invert=True, block_size=block_size | |
) | |
_, seg_map1 = cv2.connectedComponents(image=img_thr1, connectivity=4) | |
_, seg_map2 = cv2.connectedComponents(image=img_thr2, connectivity=4) | |
seg_map = _combine_two_segmentation_maps(seg_map1=seg_map1, seg_map2=seg_map2) | |
return seg_map | |
def _get_segmentation_map_overlapping_mask(seg_map, mask, overlap_thresh=0.6): | |
img_pixel_counts = _get_pixel_counts(seg_map, sort=True, include_zero=False) | |
overlapping_seg_map = _mask_image(img=seg_map, mask=(mask != 0)) | |
overlapping_counts = _get_pixel_counts( | |
overlapping_seg_map, sort=False, include_zero=False | |
) | |
df_counts = pd.DataFrame.from_dict( | |
img_pixel_counts, orient="index", columns=["total_pixel_count"] | |
) | |
df_counts["overlap_pixel_count"] = df_counts.apply( | |
lambda x: overlapping_counts.get(x.name, 0), axis=1 | |
) | |
df_counts["ratio"] = ( | |
df_counts["overlap_pixel_count"] / df_counts["total_pixel_count"] | |
) | |
region_is_inside = df_counts[df_counts["ratio"] > overlap_thresh].index.tolist() | |
mask = np.isin(seg_map, region_is_inside).astype("uint8") | |
mask = _to_3d(mask * 255) | |
return mask | |
def _split_segmentation_map(seg_map, pccs): | |
ls_idx = ( | |
pccs[pccs["inside"]] | |
.apply(lambda x: seg_map[x["y"], x["x"]], axis=1) | |
.values.tolist() | |
) | |
seg_map1 = _mask_image(img=seg_map, mask=np.isin(seg_map, ls_idx)) | |
seg_map2 = _mask_image(img=seg_map, mask=~np.isin(seg_map, ls_idx)) | |
return seg_map1, seg_map2 | |
def _segmentation_map_to_mask(seg_map): | |
return _to_3d((seg_map != 0).astype("uint8") * 255) | |
def _get_pseudo_character_centers_from_mask(mask, bboxes: pd.DataFrame = None): | |
"""Mask ์ด๋ฏธ์ง๋ก๋ถํฐ label(๊ธ์)์ ์ค์ฌ ์ขํ๋ฅผ ๊ตฌํ๋ ํจ์""" | |
center_coords = [] | |
num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats( | |
image=_to_2d(mask), connectivity=8 | |
) | |
for i in range(1, num_labels): | |
center_coords.append((int(centroids[i][0]), int(centroids[i][1]))) | |
pccs = pd.DataFrame( | |
center_coords, | |
columns=[ | |
"x", | |
"y", | |
], | |
) | |
if not bboxes.empty: | |
# ๋ฒกํฐํ ์ฐ์ฐ์ผ๋ก bbox ์์ ์๋์ง ๊ฒ์ฌ | |
pccs["inside"] = ( | |
(pccs["x"].values[:, None] > bboxes["bbox_x1"].values) & | |
(pccs["x"].values[:, None] < bboxes["bbox_x2"].values) & | |
(pccs["y"].values[:, None] > bboxes["bbox_y1"].values) & | |
(pccs["y"].values[:, None] < bboxes["bbox_y2"].values) | |
).any(axis=1) | |
else: | |
pccs["inside"] = True | |
return pccs | |
def _get_pseudo_character_centers( | |
region_score_map, region_seg_map=None, bboxes=pd.DataFrame() | |
): | |
local_max_coor = _get_local_maxima_coordinates( | |
region_score_map, region_seg_map=region_seg_map | |
) | |
pccs = pd.DataFrame(local_max_coor, columns=["x", "y"]) | |
if not bboxes.empty: | |
# ๋ฒกํฐํ ์ฐ์ฐ์ผ๋ก bbox ์์ ์๋์ง ๊ฒ์ฌ | |
pccs["inside"] = ( | |
(pccs["x"].values[:, None] > bboxes["bbox_x1"].values) & | |
(pccs["x"].values[:, None] < bboxes["bbox_x2"].values) & | |
(pccs["y"].values[:, None] > bboxes["bbox_y1"].values) & | |
(pccs["y"].values[:, None] < bboxes["bbox_y2"].values) | |
).any(axis=1) | |
else: | |
pccs["inside"] = True | |
return pccs | |
def _convert_region_score_map_to_region_mask(region_score_map, region_score_thresh=170): | |
_, region_mask = cv2.threshold( | |
src=region_score_map, thresh=30, maxval=255, type=cv2.THRESH_BINARY | |
) | |
new_mask = _get_canvas_same_size_as_image(img=region_mask, black=True) | |
n_labels, seg_map, _, _ = cv2.connectedComponentsWithStats( | |
image=_to_2d(region_mask), connectivity=4 | |
) | |
for k in range(1, n_labels): | |
if np.max(region_score_map[seg_map == k]) < region_score_thresh: | |
continue | |
new_mask[seg_map == k] = 255 | |
new_mask = _to_3d(new_mask) | |
return new_mask | |
def _split_mask(mask, region_score_map=None, bboxes=pd.DataFrame(), th=30): | |
"""mask๋ฅผ ๋ ์ข ๋ฅ๋ก ๋๋๋๋ค. ๊ฐ๊ฐ inpainting๊ณผ์ ์์ ์ง์์ผํ mask์ ๋ณต๊ตฌํด์ผํ mask ์์ญ์ ์๋ฏธํฉ๋๋ค. | |
mask1๊ณผ mask2๋ ์๋ก ๊ฒน์น ์๋ ์์ต๋๋ค. | |
๋์์๋ฆฌ : region_score_map(์ด ์์ฃผ์ด์ง ๊ฒฝ์ฐ dst_mask_map)์ th๋ก ์ด์งํ ๋ฐ segmap์ผ๋ก ๋ณํ(Connected components)ํ | |
label์์ญ ๋ณ Local maximum ํฌ์ธํธ๋ฅผ watershed์ marker๋ก ์ฌ๊ฒจ watershed๋ฅผ ์งํํ ๊ฒฐ๊ณผ๋ฅผ segmap์ผ๋ก ์ฌ๊ธฐ๊ณ , | |
pccs๋ฅผ peak_loacl_max(skimage)ํจ์๋ก region_scoremap๊ณผ segmap์ ์ด์ฉํด ๊ตฌํ๋ค. ์ด๋ bbox์ ๋ณด๋ ํฌํจ์์ผ, ๊ฐ pccs๊ฐ box์์ ๋ค์ด ์ค๋์ง ํ์ธํ ํ | |
bbox์์ ์๋ pccs์ ๋ํด ๊ฐ pccs๊ฐ ์ํ segmap์ label์์ญ(seg_map1)๊ณผ ์ํ์ง ๋ชปํ label ์์ญ(seg_map2)๋ก ๋๋๋ค. | |
Args: | |
mask (_np.ndarray_): (H,W,3)์ mask. values : (0 or 255) | |
region_score_map (_np.ndarray_): region_score_map, craft์ ๊ฒฐ๊ณผ. ๊ธ์ ์ค์ฌ์ ๊ฐ์กฐํ๋ Heat map | |
bboxes (_pd.DataFrame_): ๋ฐ์ค ์ขํ์ ๋ณด(bbox_x1,bbox_y1,bbox_x2,bbox_y2)๊ฐ ํฌํจ๋ dataFrame. | |
Returns: | |
_np.ndarray_: ์ง์์ผ ํ๋ ๋ถ๋ถ์ธ mask1. ๋ณต๊ตฌํด์ผ ํ๋ ๋ถ๋ถ์ธ mask2. | |
""" | |
if region_score_map is None: | |
dst_mask_map = _to_2d(get_dst_mask(mask)) | |
seg_map = _apply_watershed(mask=mask, region_score_map=dst_mask_map, th=th) | |
pccs = _get_pseudo_character_centers( | |
region_score_map=dst_mask_map, region_seg_map=seg_map, bboxes=bboxes | |
) | |
else: | |
seg_map = _apply_watershed(mask, region_score_map, th=th) | |
pccs = _get_pseudo_character_centers( | |
region_score_map=region_score_map, region_seg_map=seg_map, bboxes=bboxes | |
) | |
box_mask = _bboxes_to_mask(seg_map, bboxes) | |
seg_map1, seg_map2 = _split_segmentation_map(seg_map=seg_map, pccs=pccs) | |
mask1 = _segmentation_map_to_mask(seg_map1) | |
mask2 = _segmentation_map_to_mask(seg_map2) | |
mask3 = _to_3d(_mask_image(mask1, box_mask, invert=True)) | |
mask2 = _combine_masks([mask2, mask3]) | |
return mask1, mask2 | |
def get_word_segmentation_map(region_score_map, affinity_score_map): | |
_, region_mask = cv2.threshold( | |
src=region_score_map, thresh=70, maxval=255, type=cv2.THRESH_BINARY | |
) | |
_, affinity_mask = cv2.threshold( | |
src=affinity_score_map, thresh=70, maxval=255, type=cv2.THRESH_BINARY | |
) | |
word_mask = region_mask + affinity_mask | |
_, segmentation_map_word = cv2.connectedComponents(image=word_mask, connectivity=4) | |
return segmentation_map_word | |
def get_line_segmentation_map(line_score_map): | |
_, line_mask = cv2.threshold( | |
src=line_score_map, thresh=130, maxval=255, type=cv2.THRESH_BINARY | |
) | |
_, line_segmentation_map = cv2.connectedComponents(image=line_mask, connectivity=4) | |
return line_segmentation_map | |
def _get_3d_block_segmentation_map(img, bboxes): | |
segmentation_map_block = np.zeros( | |
shape=(img.shape[0], img.shape[1], len(bboxes) + 1) | |
) | |
for idx, (xmin, ymin, xmax, ymax) in enumerate( | |
bboxes[["xmin", "ymin", "xmax", "ymax"]].values, start=1 | |
): | |
segmentation_map_block[ymin:ymax, xmin:xmax, idx] = 255 | |
return segmentation_map_block | |
def compare_images(img1, img2, flag=cv2.CMP_EQ): | |
# ๋ ์ด๋ฏธ์ง๊ฐ ๊ฐ์ ์์ญ์ 255 ์๋ ์์ญ์ 0. flag๋ cv2.CMP_XX์ฐธ๊ณ (EQ==๊ฐ์ผ๋ฉด1,NE==๋ค๋ฅด๋ฉด1) | |
return cv2.compare(img1, img2, flag) | |
def convert_webp_png_get_data(img: np.ndarray): | |
pil_img = _to_pil(img) | |
convert_pil_img = pil_img.convert("RGB") | |
convert_pil_img.save("temp.png") | |
_, byte, format = load_image("temp.png", with_byte=True, with_format=True) | |
os.remove("temp.png") | |
return byte | |
def add_water_mark(original_img, water_mark_img_path): | |
if isinstance(original_img, np.ndarray): | |
original_img = _to_pil(original_img) | |
return_np = True | |
else: | |
return_np = False | |
watermark = Image.open(water_mark_img_path).convert("RGBA") | |
width_o, height_o = original_img.size | |
width_wm, height_wm = watermark.size | |
position = ((width_o - width_wm) // 2, (height_o - height_wm) // 2) | |
# ์๋ณธ ์ด๋ฏธ์ง๋ณด๋ค ํฌ๊ธฐ๊ฐ ์์ ๊ฒฝ์ฐ์๋ง ์ํฐ๋งํฌ ์ด๋ฏธ์ง๋ฅผ ๋น์จ์ ๋ง๊ฒ ์กฐ์ | |
if width_wm > width_o or height_wm > height_o: | |
# ์ํฐ๋งํฌ ์ด๋ฏธ์ง์ ๊ฐ๋ก ์ธ๋ก ๋น์จ ๊ณ์ฐ | |
ratio_w = width_o / width_wm | |
ratio_h = height_o / height_wm | |
# ๋ ์์ ๋น์จ์ ์ ํํ์ฌ ์ํฐ๋งํฌ ์ด๋ฏธ์ง๋ฅผ ์กฐ์ | |
ratio = min(ratio_w, ratio_h) | |
new_width = int(width_wm * ratio) | |
new_height = int(height_wm * ratio) | |
watermark = watermark.resize((new_width, new_height), Image.Resampling.LANCZOS) | |
width_wm, height_wm = watermark.size | |
# ์๋ก ๊ณ์ฐ๋ ์์น | |
position = ((width_o - width_wm) // 2, (height_o - height_wm) // 2) | |
original_img.paste(watermark, position, watermark) | |
rgb_image = original_img.convert("RGB") | |
if return_np: | |
return _to_array(rgb_image) | |
return rgb_image | |
def load_image(url_or_path, with_byte=False, with_format=False): | |
if "http" in url_or_path: | |
url_or_path = str(url_or_path) | |
response = requests.get(url_or_path) | |
PIL_image = Image.open(io.BytesIO(response.content)) | |
format = PIL_image.format | |
image_bytes = response.content | |
if format == "GIF": | |
img_exif = None | |
else: | |
img_exif = PIL_image._getexif() | |
if PIL_image.mode in ["L", "P", "PA", "RGBA"]: | |
PIL_image = Image.open(io.BytesIO(response.content)).convert("RGB") | |
if img_exif: | |
for k in img_exif.keys(): | |
attr = ExifTags.TAGS.get(k, "no_key") | |
if attr != "no_key": | |
if ExifTags.TAGS[k] == "Orientation": | |
if img_exif[k] == 3: | |
PIL_image = PIL_image.rotate(180, expand=True) | |
elif img_exif[k] == 6: | |
PIL_image = PIL_image.rotate(270, expand=True) | |
elif img_exif[k] == 8: | |
PIL_image = PIL_image.rotate(90, expand=True) | |
break | |
if PIL_image.mode == "CMYK": | |
cmyk_profile = ImageCms.ImageCmsProfile("resources/USWebCoatedSWOP.icc") | |
srgb_profile = ImageCms.ImageCmsProfile( | |
"resources/sRGB Color Space Profile.icm" | |
) | |
PIL_image = ImageCms.profileToProfile( | |
PIL_image, cmyk_profile, srgb_profile, outputMode="RGB" | |
) | |
img = np.array(PIL_image) | |
else: | |
img = np.array(PIL_image) | |
else: | |
# img = cv2.imread(url_or_path, flags=cv2.IMREAD_COLOR) | |
# img = cv2.cvtColor(src=img, code=cv2.COLOR_BGR2RGB) | |
PIL_image = Image.open(url_or_path) | |
format = PIL_image.format | |
byte_arr = io.BytesIO() | |
if PIL_image.mode == "RGBA": | |
PIL_image = PIL_image.convert("RGB") | |
PIL_image.save(byte_arr, format="JPEG") | |
image_bytes = byte_arr.getvalue() | |
img = np.array(PIL_image) | |
# if "http" in url_or_path: | |
# img = cv2.imdecode( | |
# np.asarray(bytearray(requests.get(url_or_path).content), dtype="uint8"), flags=cv2.IMREAD_COLOR | |
# ) | |
# else: | |
# img = cv2.imread(url_or_path, flags=cv2.IMREAD_COLOR) | |
# img = cv2.cvtColor(src=img, code=cv2.COLOR_BGR2RGB) | |
if with_byte: | |
if with_format: | |
return img, image_bytes, format | |
else: | |
return img, image_bytes | |
return img | |
def save_image(img1, img2=None, alpha=0.5, path="") -> None: | |
copied_img1 = _preprocess_image(_to_array(img1.copy(order="C"))) | |
if img2 is None: | |
img_arr = copied_img1 | |
else: | |
copied_img2 = _to_array(_preprocess_image(_to_array(img2.copy(order="C")))) | |
img_arr = _to_array( | |
_blend_two_images(img1=copied_img1, img2=copied_img2, alpha=alpha) | |
) | |
path = Path(path) | |
path.parent.mkdir(parents=True, exist_ok=True) | |
if os.path.splitext(str(path))[1] == ".gif": | |
pil = _to_pil(img1) | |
pil.save(str(path)) | |
return True | |
if img_arr.ndim == 3: | |
cv2.imwrite( | |
filename=str(path), | |
img=img_arr[:, :, ::-1], | |
params=[cv2.IMWRITE_JPEG_QUALITY, 100], | |
) | |
elif img_arr.ndim == 2: | |
cv2.imwrite( | |
filename=str(path), img=img_arr, params=[cv2.IMWRITE_JPEG_QUALITY, 100] | |
) | |
def show_image(img1, img2=None, alpha=0.5): | |
img1 = _to_pil(_preprocess_image(_to_array(img1))) | |
if img2 is None: | |
img1.show() | |
else: | |
img2 = _to_pil(_preprocess_image(_to_array(img2))) | |
img_blended = Image.blend(im1=img1, im2=img2, alpha=alpha) | |
img_blended.show() | |
def draw_bboxes(img, bboxes: pd.DataFrame, index=False): | |
"""์์ฑ์ถ์ถ์ ์๋ณธ ์ด๋ฏธ์ง์ bboxes์ ๋ณด๋ฅผ ๊ฐ์ง๊ณ ์ด๋ฏธ์ง์์ bboxes๋ฅผ ์๊ฐํ ํด์ฃผ๋ ํจ์.""" | |
canvas = _to_pil(_get_canvas_same_size_as_image(img=img, black=True)) | |
draw = ImageDraw.Draw(canvas) | |
dic = dict() | |
for row in bboxes.itertuples(): | |
h = row.bbox_y2 - row.bbox_y1 | |
w = row.bbox_x2 - row.bbox_x1 | |
smaller = min(w, h) | |
thickness = max(1, smaller // 22) | |
dic[row.Index] = ((0, 255, 0), (0, 100, 0), thickness) | |
for row in bboxes.itertuples(): | |
_, fill, thickness = dic[row.Index] | |
draw.rectangle( | |
xy=(row.bbox_x1, row.bbox_y1, row.bbox_x2, row.bbox_y2), | |
outline=None, | |
fill=fill, | |
width=thickness, | |
) | |
for row in bboxes.itertuples(): | |
outline, _, thickness = dic[row.Index] | |
draw.rectangle( | |
xy=(row.bbox_x1, row.bbox_y1, row.bbox_x2, row.bbox_y2), | |
outline=outline, | |
fill=None, | |
width=thickness, | |
) | |
if index: | |
from data_utils.rendering_utils import _get_font | |
max_len = max(map(len, map(str, bboxes.index))) | |
for row in bboxes.itertuples(): | |
h = row.bbox_y2 - row.bbox_y1 | |
w = row.bbox_x2 - row.bbox_x1 | |
smaller = min(w, h) | |
font_size = max(10, min(40, smaller // 4)) | |
draw.text( | |
xy=(row.bbox_x1, row.bbox_y1 - 4), | |
text=str(row.Index).zfill(max_len), | |
fill="white", | |
stroke_fill="black", | |
stroke_width=2, | |
font=_get_font(lang="en", font_size=font_size), | |
anchor="ls", | |
) | |
return _blend_two_images(img1=canvas, img2=img, alpha=0.4) | |
def visualize_clusters(img, bboxes, index=False): | |
from data_utils.rendering_utils import _get_font | |
canvas = _to_pil(_get_canvas_same_size_as_image(img=img, black=True)) | |
draw = ImageDraw.Draw(canvas) | |
dic = dict() | |
for row in bboxes.itertuples(): | |
h = row.bbox_y2 - row.bbox_y1 | |
w = row.bbox_x2 - row.bbox_x1 | |
smaller = min(w, h) | |
thickness = max(1, smaller // 22) | |
dic[row.Index] = ((255, 255, 255), COLORS[row.cluster], thickness) | |
for row in bboxes.itertuples(): | |
_, fill, thickness = dic[row.Index] | |
draw.rectangle( | |
xy=(row.bbox_x1, row.bbox_y1, row.bbox_x2, row.bbox_y2), | |
outline=None, | |
fill=fill, | |
width=1, | |
) | |
for row in bboxes.itertuples(): | |
outline, _, thickness = dic[row.Index] | |
draw.rectangle( | |
xy=(row.bbox_x1, row.bbox_y1, row.bbox_x2, row.bbox_y2), | |
outline=outline, | |
fill=None, | |
width=1, | |
) | |
if index: | |
for row in bboxes.itertuples(): | |
h = row.bbox_y2 - row.bbox_y1 | |
w = row.bbox_x2 - row.bbox_x1 | |
smaller = min(w, h) | |
font_size = max(14, min(40, smaller * 0.35)) | |
draw.text( | |
xy=(row.bbox_x1, row.bbox_y1 - 4), | |
text=str(row.cluster), | |
fill="white", | |
stroke_fill="black", | |
stroke_width=2, | |
font=_get_font(lang="en", font_size=font_size), | |
anchor="ls", | |
) | |
return _blend_two_images(img1=canvas, img2=img, alpha=0.25) | |
def draw_bboxes_and_textboxes(bboxes, img): | |
canvas = img.copy(order="C") | |
for row in bboxes.itertuples(): | |
cv2.rectangle( | |
img=canvas, | |
pt1=(row.bbox_x1, row.bbox_y1), | |
pt2=(row.bbox_x2, row.bbox_y2), | |
color=(0, 255, 0), | |
thickness=4, | |
) | |
cv2.rectangle( | |
img=canvas, | |
pt1=(row.tbox_x1, row.tbox_y1), | |
pt2=(row.tbox_x2, row.tbox_y2), | |
color=(255, 0, 0), | |
thickness=2, | |
) | |
return canvas | |
def draw_pseudo_character_centers(img, pccs, margin=4): | |
canvas = _to_pil(_get_canvas_same_size_as_image(img=img, black=True)) | |
draw = ImageDraw.Draw(canvas) | |
for row in pccs.itertuples(): | |
draw.ellipse( | |
xy=(row.x - margin, row.y - margin, row.x + margin, row.y + margin), | |
outline=(255, 0, 0), | |
fill=(100, 0, 0), | |
) | |
return _blend_two_images(img1=canvas, img2=img, alpha=0.3) | |
def _resize_image(img, w, h): | |
ori_w, ori_h = _get_width_and_height(img) | |
if w < ori_w or h < ori_h: | |
interpolation = cv2.INTER_AREA | |
else: | |
interpolation = cv2.INTER_LANCZOS4 | |
resized_img = cv2.resize(src=img, dsize=(w, h), interpolation=interpolation) | |
return resized_img | |
def _resize_image_using_shorter_side(img, img_size=1530): | |
ori_w, ori_h = _get_width_and_height(img) | |
shorter = min(ori_w, ori_h) | |
if shorter <= img_size: | |
return img | |
if ori_w < ori_h: | |
resized_img = cv2.resize( | |
src=img, | |
dsize=(img_size, round(ori_h * (img_size / ori_w))), | |
interpolation=cv2.INTER_AREA, | |
) | |
else: | |
resized_img = cv2.resize( | |
src=img, | |
dsize=(round(ori_w * (img_size / ori_h)), img_size), | |
interpolation=cv2.INTER_AREA, | |
) | |
return resized_img | |
def _resize_image_using_longer_side(img, img_size=2560): | |
ori_w, ori_h = _get_width_and_height(img) | |
longer = max(ori_w, ori_h) | |
if longer <= img_size: | |
return img | |
if ori_w < ori_h: | |
resized_img = cv2.resize( | |
src=img, | |
dsize=(round(ori_w * (img_size / ori_h)), img_size), | |
interpolation=cv2.INTER_AREA, | |
) | |
else: | |
resized_img = cv2.resize( | |
src=img, | |
dsize=(img_size, round(ori_h * (img_size / ori_w))), | |
interpolation=cv2.INTER_AREA, | |
) | |
return resized_img | |
def _split_image_3(img, print=False): | |
if img.ndim == 2: | |
is_2d = True | |
else: | |
is_2d = False | |
img = _to_3d(img) | |
w, h = _get_width_and_height(img) | |
if h >= w: | |
if print: | |
print(f"Resolution: {w}, {h} -> {w}, {h // 2}") | |
img1 = img[: h // 2, :, :] | |
img2 = img[h // 4 : h // 4 + h // 2, :, :] | |
img3 = img[-h // 2 :, :, :] | |
else: | |
if print: | |
print(f"Resolution: {w}, {h} -> {w // 2}, {h}") | |
img1 = img[:, : w // 2, :] | |
img2 = img[:, w // 2 // 2 : w // 2 // 2 + w // 2, :] | |
img3 = img[:, -w // 2 :, :] | |
if is_2d: | |
img1 = _to_2d(img1) | |
img2 = _to_2d(img2) | |
img3 = _to_2d(img3) | |
return img1, img2, img3 | |
def _split_image_2(img, print=False): | |
if img.ndim == 2: | |
is_2d = True | |
else: | |
is_2d = False | |
img = _to_3d(img) | |
w, h = _get_width_and_height(img) | |
if h >= w: | |
if print: | |
print(f"Resolution: {w}, {h} -> {w}, {h // 2}") | |
img1 = img[: h // 2, :, :] | |
img3 = img[-h // 2 :, :, :] | |
else: | |
if print: | |
print(f"Resolution: {w}, {h} -> {w // 2}, {h}") | |
img1 = img[:, : w // 2, :] | |
img3 = img[:, -w // 2 :, :] | |
if is_2d: | |
img1 = _to_2d(img1) | |
img3 = _to_2d(img3) | |
return img1, img3 | |
def _combine_images_3(img, img1, img2, img3): | |
if (img1 is None) and (img2 is None) and (img3 is None): | |
canvas = None | |
else: | |
img1 = _to_2d(img1) | |
img2 = _to_2d(img2) | |
img3 = _to_2d(img3) | |
canvas = _get_canvas_same_size_as_image(_to_2d(img), black=True) | |
w, h = _get_width_and_height(img) | |
if h >= w: | |
canvas[: h // 2, :] = img1 | |
canvas[h // 2 // 2 : h // 2 // 2 + h // 2, :] = np.maximum( | |
canvas[h // 2 // 2 : h // 2 // 2 + h // 2, :], img2 | |
) | |
canvas[-h // 2 :, :] = np.maximum(canvas[-h // 2 :, :], img3) | |
else: | |
canvas[:, : w // 2] = img1 | |
canvas[:, w // 2 // 2 : w // 2 // 2 + w // 2] = np.maximum( | |
canvas[:, w // 2 // 2 : w // 2 // 2 + w // 2], img2 | |
) | |
canvas[:, -w // 2 :] = np.maximum(canvas[:, -w // 2 :], img3) | |
return canvas | |
def _combine_images_2(img, img1, img2): | |
if (img1 is None) and (img2 is None): | |
canvas = None | |
else: | |
canvas = _get_canvas_same_size_as_image(img, black=True) | |
w, h = _get_width_and_height(img) | |
if h >= w: | |
canvas[: h // 2, :] = img1 | |
canvas[-h // 2 :, :] = np.maximum(canvas[-h // 2 :, :], img2) | |
else: | |
canvas[:, : w // 2] = img1 | |
canvas[:, -w // 2 :] = np.maximum(canvas[:, -w // 2 :], img2) | |
return canvas | |
def _rotate_90_degrees(img, counterclockwise=False): | |
return cv2.rotate( | |
src=img, | |
rotateCode=cv2.ROTATE_90_COUNTERCLOCKWISE | |
if counterclockwise | |
else cv2.ROTATE_90_CLOCKWISE, | |
) | |
def save_image_patches(img, bboxes, dir): | |
for row in bboxes.itertuples(): | |
patch = _crop_image( | |
img=img, | |
l=row.bbox_x1, | |
t=row.bbox_y1, | |
r=row.bbox_x2, | |
b=row.bbox_y2, | |
) | |
patch_w = row.bbox_x2 - row.bbox_x1 | |
patch_h = row.bbox_y2 - row.bbox_y1 | |
if patch_h > patch_w: | |
patch = _rotate_90_degrees(patch, counterclockwise=False) | |
save_image(img1=patch, path=Path(dir) / f"{str(row.Index).zfill(4)}.jpg") | |
def get_minimum_area_bounding_rectangle(mask): | |
bool = _to_2d(mask.astype("uint8")) != 0 | |
nonzero_x = np.where(bool.any(axis=0))[0] | |
nonzero_y = np.where(bool.any(axis=1))[0] | |
if len(nonzero_x) != 0 and len(nonzero_y) != 0: | |
bbox_x1 = nonzero_x[0] | |
bbox_x2 = nonzero_x[-1] | |
bbox_y1 = nonzero_y[0] | |
bbox_y2 = nonzero_y[-1] | |
return int(bbox_x1), int(bbox_y1), int(bbox_x2), int(bbox_y2) | |
else: | |
return 0, 0, 0, 0 | |
def get_minimum_area_bounding_rectangle2(mask, l, t, r, b): | |
bool = _to_2d(mask.astype("uint8")) != 0 | |
nonzero_x = np.where(bool.any(axis=0))[0] | |
nonzero_y = np.where(bool.any(axis=1))[0] | |
try: | |
new_l = nonzero_x[np.where(l < nonzero_x)][0] | |
except Exception: | |
new_l = l | |
try: | |
new_t = nonzero_y[np.where(t < nonzero_y)][0] | |
except Exception: | |
new_t = t | |
try: | |
new_r = nonzero_x[np.where(nonzero_x < r)][-1] | |
except Exception: | |
new_r = r | |
try: | |
new_b = nonzero_y[np.where(nonzero_y < b)][-1] | |
except Exception: | |
new_b = b | |
return new_l, new_t, new_r, new_b | |
def _downsample_image(img): | |
ori_w, ori_h = _get_width_and_height(img) | |
resized = _resize_image(img, w=ori_w // 2, h=ori_h // 2) | |
return resized | |
def _upsample_image(img): | |
ori_w, ori_h = _get_width_and_height(img) | |
resized = _resize_image(img, w=ori_w * 2, h=ori_h * 2) | |
return resized | |
def _get_pseudo_image(img, mask, invert=False): | |
if invert: | |
mask = _invert_image(mask) | |
rows, cols = np.nonzero(_to_2d(mask)) | |
pseudo_outer = img[rows, cols, :].reshape((1, -1, 3)) | |
return pseudo_outer | |
def resize_coordinates_and_image_to_fit_to_maximum_pixel_counts( | |
bboxes, img, max_pixel_counts=1530 | |
): | |
w, h = _get_width_and_height(img) | |
ratio = min(max_pixel_counts / h, max_pixel_counts / w) | |
if ratio < 1: | |
for col in ["xmin", "ymin", "xmax", "ymax"]: | |
bboxes[col] = bboxes[col].apply(lambda x: int(x * ratio)) | |
img = cv2.resize( | |
src=img, | |
dsize=(int(w * ratio), int(h * ratio)), | |
interpolation=cv2.INTER_LANCZOS4, | |
) | |
return bboxes, img | |
def get_image_patches_3(img, text_stroke_mask, mask1, mask2): | |
splitting_mask = get_splitting_mask(text_stroke_mask) | |
_, _, stats, _ = cv2.connectedComponentsWithStats( | |
image=_to_2d(splitting_mask), connectivity=4 | |
) | |
ls_patches = list() | |
for xmin, ymin, width, height, px_cnt in stats[1:, :]: | |
xmax = xmin + width | |
ymax = ymin + height | |
cropped_img = _crop_image(img=img, l=xmin, t=ymin, r=xmax, b=ymax) | |
cropped_mask1 = _crop_image(img=mask1, l=xmin, t=ymin, r=xmax, b=ymax) | |
cropped_mask2 = _crop_image(img=mask2, l=xmin, t=ymin, r=xmax, b=ymax) | |
ls_patches.append( | |
{ | |
"xmin": xmin, | |
"ymin": ymin, | |
"xmax": xmax, | |
"ymax": ymax, | |
"img": cropped_img, | |
"mask1": cropped_mask1, | |
"mask2": cropped_mask2, | |
} | |
) | |
return ls_patches | |
def get_image_patches_2(img, mask1, mask2): | |
splitting_mask = get_splitting_mask(mask1) | |
_, _, stats, _ = cv2.connectedComponentsWithStats( | |
image=_to_2d(splitting_mask), connectivity=4 | |
) | |
ls_patches = list() | |
for x1, y1, w, h, _ in stats[1:, :]: | |
x2 = x1 + w | |
y2 = y1 + h | |
cropped_img = _crop_image(img=img, l=x1, t=y1, r=x2, b=y2) | |
cropped_mask1 = _crop_image(img=mask1, l=x1, t=y1, r=x2, b=y2) | |
cropped_mask2 = _crop_image(img=mask2, l=x1, t=y1, r=x2, b=y2) | |
ls_patches.append( | |
{ | |
"x1": x1, | |
"y1": y1, | |
"x2": x2, | |
"y2": y2, | |
"img": cropped_img, | |
"mask1": cropped_mask1, | |
"mask2": cropped_mask2, | |
} | |
) | |
return ls_patches | |
def get_splitting_mask(text_stroke_mask): | |
splitting_mask = _dilate_mask(text_stroke_mask, kernel_size=200) | |
return splitting_mask | |
def enhance_sharpness(img): | |
"""img์ ์ ๋ช ๋๋ฅผ ๋์. 3๊ฐ์ง ๋ฐฉ๋ฒ์ด ์์(sharpening filter, unsharpening mask, pil sharpening) | |
3 ๋ฐฉ๋ฒ ์ค PIL ์ด ๊ฐ์ฅ ์๋ณธ์ ์๋ณํ๊ฐ ์ ์ | |
Args: | |
img (_np.ndarray_): ์ด๋ฏธ์ง | |
Returns: | |
_np.ndarray_: ๊ฒฐ๊ณผ ์ด๋ฏธ์ง | |
""" | |
# sharpening_k = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]]) | |
# hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV) | |
# sharpened_v = cv2.filter2D(hsv[..., 2], -1, sharpening_k) | |
# hsv[..., 2] = sharpened_v | |
# img_patch2 = cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB) | |
# src_ycrcb = cv2.cvtColor(img, cv2.COLOR_RGB2YCrCb) | |
# src_f = src_ycrcb[:, :, 0].astype(np.float32) | |
# blr = cv2.GaussianBlur(src_f, (0, 0), 2.0) | |
# src_ycrcb[:, :, 0] = np.clip(2. * src_f - blr, 0, 255).astype(np.uint8) | |
# img_patch3 = cv2.cvtColor(src_ycrcb, cv2.COLOR_YCrCb2RGB) | |
pil_img = _to_pil(img) | |
sharpness_img = ImageEnhance.Sharpness(pil_img).enhance(2) | |
result_img = _to_array(sharpness_img) | |
return result_img | |
def mask2point(mask): | |
# mask (H,W,3) 0 or 255 -> (N,2) | |
mask = _to_2d(mask) | |
indices = np.argwhere(mask == 255) | |
return indices | |
def get_corner(corner_coords): | |
# corner_coords (N,2) each point means (y,x) | |
cy, cx = np.mean(corner_coords, axis=0) | |
quadrant_1 = corner_coords[(corner_coords[:, 0] < cy) & (corner_coords[:, 1] >= cx)] | |
rt = quadrant_1[:, 1].max(), quadrant_1[:, 0].min() | |
quadrant_2 = corner_coords[(corner_coords[:, 0] < cy) & (corner_coords[:, 1] < cx)] | |
lt = quadrant_2[:, 1].min(), quadrant_2[:, 0].min() | |
quadrant_3 = corner_coords[(corner_coords[:, 0] >= cy) & (corner_coords[:, 1] < cx)] | |
lb = quadrant_3[:, 1].min(), quadrant_3[:, 0].max() | |
quadrant_4 = corner_coords[ | |
(corner_coords[:, 0] >= cy) & (corner_coords[:, 1] >= cx) | |
] | |
rb = quadrant_4[:, 1].max(), quadrant_4[:, 0].max() | |
return lt, rt, rb, lb | |
def get_dst_mask(mask): | |
mask = _to_2d(mask) | |
dst = cv2.distanceTransform(mask, cv2.DIST_L2, 5) | |
# ๊ฑฐ๋ฆฌ ๊ฐ์ 0 ~ 255 ๋ฒ์๋ก ์ ๊ทํ ---โก | |
dist_transform_normalized = cv2.normalize( | |
dst, None, 0, 255, cv2.NORM_MINMAX, dtype=cv2.CV_8U | |
) | |
return _to_3d(dist_transform_normalized) | |
def unwarp(img, src, dst): | |
h, w = img.shape[:2] | |
# use cv2.getPerspectiveTransform() to get M, the transform matrix, and Minv, the inverse | |
M = cv2.getPerspectiveTransform(src, dst) | |
# use cv2.warpPerspective() to warp your image to a top-down view | |
warped = cv2.warpPerspective(img, M, (w, h), flags=cv2.INTER_LINEAR) | |
return warped, M | |
def perspective_correction(img, src=None, vis=False, method: PC_TYPE = PC_TYPE.HARRIS): | |
# img (H,W,C) 0~255, src=[[ltx,lty],[rtx,rty],[rbx,rby],[lbx,lby]] | |
if src is None: | |
gray = _to_grayscale(img) | |
if not isinstance(method, PC_TYPE): | |
raise ValueError( | |
f"Invalid method: {method}. Expected one of {list(PC_TYPE)}." | |
) | |
if method == PC_TYPE.HARRIS: | |
corner = cv2.cornerHarris(gray, 5, 3, 0.04) # (H,W) value: corner score | |
threshold = 0.005 * corner.max() | |
corner_coords = np.argwhere(corner > threshold) | |
elif method == PC_TYPE.EDGES_CONTOURS: | |
blurred = cv2.GaussianBlur(gray, (5, 5), 0) | |
edges = cv2.Canny(blurred, 50, 150) | |
contours, _ = cv2.findContours( | |
edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE | |
) | |
contour_points = [] | |
for cs in contours: | |
c = [css for css in cs] | |
contour_points.extend(c) | |
corner_coords = np.array(contour_points).reshape(-1, 2)[..., ::-1] | |
elif method == PC_TYPE.GFTT: | |
corners = cv2.goodFeaturesToTrack( | |
gray, 0, 0.01, 5, blockSize=3, useHarrisDetector=True, k=0.03 | |
) | |
corner_coords = corners.reshape(corners.shape[0], 2)[..., ::-1] | |
elif method == PC_TYPE.FAST: | |
th = 50 | |
fast = cv2.FastFeatureDetector_create(th) | |
keypoints = fast.detect(gray) | |
corner_coords = np.array([[kp.pt[1], kp.pt[0]] for kp in keypoints]) | |
elif method == PC_TYPE.KAZE: | |
# feature = cv2.SIFT_create() | |
feature = cv2.KAZE_create() | |
keypoints = feature.detect(gray) | |
corner_coords = np.array([[kp.pt[1], kp.pt[0]] for kp in keypoints]) | |
if vis: | |
view_img = img.copy() | |
for corner in corner_coords: | |
y, x = corner | |
cv2.circle(view_img, (int(x), int(y)), 3, (255, 0, 0), 2) | |
save_image(view_img, path="vis_corner.png") | |
lt, rt, rb, lb = get_corner(corner_coords) | |
src = np.float32([lt, rt, rb, lb]) | |
dst = np.float32( | |
[ | |
(0, 0), | |
(img.shape[1] - 1, 0), | |
(img.shape[1] - 1, img.shape[0] - 1), | |
(0, img.shape[0] - 1), | |
] | |
) | |
result, M = unwarp(img, src, dst) | |
save_image(result, path="cv_result.png") | |
return result | |
if __name__ == "__main__": | |
image_url = "https://d2reotjpatzlok.cloudfront.net/qr-place/item/QR_20240726_2441_2_LZ1ZFCT38HN7PPCEZR8H.jpg" | |
img, imgdata, format = load_image(image_url, with_byte=True, with_format=True) | |
perspective_correction(img, vis=True) | |