Spaces:

esun-choi
/

InvoHide_Inisw_teem8

Sleeping

App Files Files Community

esun-choi commited on Dec 19, 2023

Commit

8804c8f

•

1 Parent(s): fab8eeb

Initial Commit

Browse files

Files changed (46) hide show

__pycache__/craft.cpython-310.pyc +0 -0
__pycache__/craft_utils.cpython-310.pyc +0 -0
__pycache__/file_utils.cpython-310.pyc +0 -0
__pycache__/imgproc.cpython-310.pyc +0 -0
__pycache__/mosaik.cpython-310.pyc +0 -0
__pycache__/ner.cpython-310.pyc +0 -0
__pycache__/recognize.cpython-310.pyc +0 -0
__pycache__/refinenet.cpython-310.pyc +0 -0
__pycache__/seg.cpython-310.pyc +0 -0
__pycache__/seg2.cpython-310.pyc +0 -0
basenet/__init__.py +0 -0
basenet/__pycache__/__init__.cpython-310.pyc +0 -0
basenet/__pycache__/vgg16_bn.cpython-310.pyc +0 -0
basenet/vgg16_bn.py +72 -0
craft.py +76 -0
craft_utils.py +217 -0
dino2/__pycache__/model.cpython-310.pyc +0 -0
dino2/model.py +93 -0
file_utils.py +77 -0
imgproc.py +70 -0
input/1.png +0 -0
input/2.png +0 -0
input/3.png +0 -0
input/4.png +0 -0
install.sh +1 -0
main.py +354 -0
mosaik.py +32 -0
ner.py +106 -0
recognize.py +18 -0
refinenet.py +65 -0
requirements.txt +18 -0
reset.sh +15 -0
seg.py +46 -0
seg2.py +68 -0
sr/__pycache__/sr.cpython-310.pyc +0 -0
sr/esrgan +1 -0
sr/sr.py +15 -0
unet/__pycache__/predict.cpython-310.pyc +0 -0
unet/dino/__init__.py +1 -0
unet/dino/__pycache__/__init__.cpython-310.pyc +0 -0
unet/dino/__pycache__/model.cpython-310.pyc +0 -0
unet/dino/__pycache__/modules.cpython-310.pyc +0 -0
unet/dino/__pycache__/parts.cpython-310.pyc +0 -0
unet/dino/model.py +47 -0
unet/dino/parts.py +67 -0
unet/predict.py +76 -0

__pycache__/craft.cpython-310.pyc ADDED Viewed

Binary file (2.38 kB). View file

__pycache__/craft_utils.cpython-310.pyc ADDED Viewed

Binary file (5.68 kB). View file

__pycache__/file_utils.cpython-310.pyc ADDED Viewed

Binary file (2.49 kB). View file

__pycache__/imgproc.cpython-310.pyc ADDED Viewed

Binary file (2.08 kB). View file

__pycache__/mosaik.cpython-310.pyc ADDED Viewed

Binary file (698 Bytes). View file

__pycache__/ner.cpython-310.pyc ADDED Viewed

Binary file (906 Bytes). View file

__pycache__/recognize.cpython-310.pyc ADDED Viewed

Binary file (716 Bytes). View file

__pycache__/refinenet.cpython-310.pyc ADDED Viewed

Binary file (1.93 kB). View file

__pycache__/seg.cpython-310.pyc ADDED Viewed

Binary file (1.5 kB). View file

__pycache__/seg2.cpython-310.pyc ADDED Viewed

Binary file (1.27 kB). View file

basenet/__init__.py ADDED Viewed

File without changes

basenet/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (164 Bytes). View file

basenet/__pycache__/vgg16_bn.cpython-310.pyc ADDED Viewed

Binary file (2.27 kB). View file

basenet/vgg16_bn.py ADDED Viewed

	@@ -0,0 +1,72 @@

+from collections import namedtuple
+import torch
+import torch.nn as nn
+import torch.nn.init as init
+from torchvision import models
+def init_weights(modules):
+    for m in modules:
+        if isinstance(m, nn.Conv2d):
+            init.xavier_uniform_(m.weight.data)
+            if m.bias is not None:
+                m.bias.data.zero_()
+        elif isinstance(m, nn.BatchNorm2d):
+            m.weight.data.fill_(1)
+            m.bias.data.zero_()
+        elif isinstance(m, nn.Linear):
+            m.weight.data.normal_(0, 0.01)
+            m.bias.data.zero_()
+class vgg16_bn(torch.nn.Module):
+    def __init__(self, pretrained=True, freeze=True):
+        super(vgg16_bn, self).__init__()
+        vgg_pretrained_features = models.vgg16_bn(pretrained=pretrained).features
+        self.slice1 = torch.nn.Sequential()
+        self.slice2 = torch.nn.Sequential()
+        self.slice3 = torch.nn.Sequential()
+        self.slice4 = torch.nn.Sequential()
+        self.slice5 = torch.nn.Sequential()
+        for x in range(12):         # conv2_2
+            self.slice1.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(12, 19):         # conv3_3
+            self.slice2.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(19, 29):         # conv4_3
+            self.slice3.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(29, 39):         # conv5_3
+            self.slice4.add_module(str(x), vgg_pretrained_features[x])
+        # fc6, fc7 without atrous conv
+        self.slice5 = torch.nn.Sequential(
+                nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
+                nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6),
+                nn.Conv2d(1024, 1024, kernel_size=1)
+        )
+        if not pretrained:
+            init_weights(self.slice1.modules())
+            init_weights(self.slice2.modules())
+            init_weights(self.slice3.modules())
+            init_weights(self.slice4.modules())
+        init_weights(self.slice5.modules())        # no pretrained model for fc6 and fc7
+        if freeze:
+            for param in self.slice1.parameters():      # only first conv
+                param.requires_grad= False
+    def forward(self, X):
+        h = self.slice1(X)
+        h_relu2_2 = h
+        h = self.slice2(h)
+        h_relu3_2 = h
+        h = self.slice3(h)
+        h_relu4_3 = h
+        h = self.slice4(h)
+        h_relu5_3 = h
+        h = self.slice5(h)
+        h_fc7 = h
+        vgg_outputs = namedtuple("VggOutputs", ['fc7', 'relu5_3', 'relu4_3', 'relu3_2', 'relu2_2'])
+        out = vgg_outputs(h_fc7, h_relu5_3, h_relu4_3, h_relu3_2, h_relu2_2)
+        return out

craft.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from basenet.vgg16_bn import vgg16_bn, init_weights
+class double_conv(nn.Module):
+    def __init__(self, in_ch, mid_ch, out_ch):
+        super(double_conv, self).__init__()
+        self.conv = nn.Sequential(
+            nn.Conv2d(in_ch + mid_ch, mid_ch, kernel_size=1),
+            nn.BatchNorm2d(mid_ch),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(mid_ch, out_ch, kernel_size=3, padding=1),
+            nn.BatchNorm2d(out_ch),
+            nn.ReLU(inplace=True)
+        )
+    def forward(self, x):
+        x = self.conv(x)
+        return x
+class CRAFT(nn.Module):
+    def __init__(self, pretrained=False, freeze=False):
+        super(CRAFT, self).__init__()
+        """ Base network """
+        self.basenet = vgg16_bn(pretrained, freeze)
+        """ U network """
+        self.upconv1 = double_conv(1024, 512, 256)
+        self.upconv2 = double_conv(512, 256, 128)
+        self.upconv3 = double_conv(256, 128, 64)
+        self.upconv4 = double_conv(128, 64, 32)
+        num_class = 2
+        self.conv_cls = nn.Sequential(
+            nn.Conv2d(32, 32, kernel_size=3, padding=1), nn.ReLU(inplace=True),
+            nn.Conv2d(32, 32, kernel_size=3, padding=1), nn.ReLU(inplace=True),
+            nn.Conv2d(32, 16, kernel_size=3, padding=1), nn.ReLU(inplace=True),
+            nn.Conv2d(16, 16, kernel_size=1), nn.ReLU(inplace=True),
+            nn.Conv2d(16, num_class, kernel_size=1),
+        )
+        init_weights(self.upconv1.modules())
+        init_weights(self.upconv2.modules())
+        init_weights(self.upconv3.modules())
+        init_weights(self.upconv4.modules())
+        init_weights(self.conv_cls.modules())
+    def forward(self, x):
+        """ Base network """
+        sources = self.basenet(x)
+        """ U network """
+        y = torch.cat([sources[0], sources[1]], dim=1)
+        y = self.upconv1(y)
+        y = F.interpolate(y, size=sources[2].size()[2:], mode='bilinear', align_corners=False)
+        y = torch.cat([y, sources[2]], dim=1)
+        y = self.upconv2(y)
+        y = F.interpolate(y, size=sources[3].size()[2:], mode='bilinear', align_corners=False)
+        y = torch.cat([y, sources[3]], dim=1)
+        y = self.upconv3(y)
+        y = F.interpolate(y, size=sources[4].size()[2:], mode='bilinear', align_corners=False)
+        y = torch.cat([y, sources[4]], dim=1)
+        feature = self.upconv4(y)
+        y = self.conv_cls(feature)
+        return y.permute(0,2,3,1), feature

craft_utils.py ADDED Viewed

	@@ -0,0 +1,217 @@

+import numpy as np
+import cv2
+import math
+def warpCoord(Minv, pt):
+    out = np.matmul(Minv, (pt[0], pt[1], 1))
+    return np.array([out[0]/out[2], out[1]/out[2]])
+def getDetBoxes_core(textmap, linkmap, text_threshold, link_threshold, low_text):
+    linkmap = linkmap.copy()
+    textmap = textmap.copy()
+    img_h, img_w = textmap.shape
+    ret, text_score = cv2.threshold(textmap, low_text, 1, 0)
+    ret, link_score = cv2.threshold(linkmap, link_threshold, 1, 0)
+    text_score_comb = np.clip(text_score + link_score, 0, 1)
+    nLabels, labels, stats, centroids = cv2.connectedComponentsWithStats(text_score_comb.astype(np.uint8), connectivity=4)
+    det = []
+    mapper = []
+    for k in range(1,nLabels):
+        size = stats[k, cv2.CC_STAT_AREA]
+        if size < 10: continue
+        if np.max(textmap[labels==k]) < text_threshold: continue
+        segmap = np.zeros(textmap.shape, dtype=np.uint8)
+        segmap[labels==k] = 255
+        segmap[np.logical_and(link_score==1, text_score==0)] = 0
+        x, y = stats[k, cv2.CC_STAT_LEFT], stats[k, cv2.CC_STAT_TOP]
+        w, h = stats[k, cv2.CC_STAT_WIDTH], stats[k, cv2.CC_STAT_HEIGHT]
+        niter = int(math.sqrt(size * min(w, h) / (w * h)) * 2)
+        sx, ex, sy, ey = x - niter, x + w + niter + 1, y - niter, y + h + niter + 1
+        if sx < 0 : sx = 0
+        if sy < 0 : sy = 0
+        if ex >= img_w: ex = img_w
+        if ey >= img_h: ey = img_h
+        kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(1 + niter, 1 + niter))
+        segmap[sy:ey, sx:ex] = cv2.dilate(segmap[sy:ey, sx:ex], kernel)
+        np_contours = np.roll(np.array(np.where(segmap!=0)),1,axis=0).transpose().reshape(-1,2)
+        rectangle = cv2.minAreaRect(np_contours)
+        box = cv2.boxPoints(rectangle)
+        w, h = np.linalg.norm(box[0] - box[1]), np.linalg.norm(box[1] - box[2])
+        box_ratio = max(w, h) / (min(w, h) + 1e-5)
+        if abs(1 - box_ratio) <= 0.1:
+            l, r = min(np_contours[:,0]), max(np_contours[:,0])
+            t, b = min(np_contours[:,1]), max(np_contours[:,1])
+            box = np.array([[l, t], [r, t], [r, b], [l, b]], dtype=np.float32)
+        startidx = box.sum(axis=1).argmin()
+        box = np.roll(box, 4-startidx, 0)
+        box = np.array(box)
+        det.append(box)
+        mapper.append(k)
+    return det, labels, mapper
+def getPoly_core(boxes, labels, mapper, linkmap):
+    num_cp = 5
+    max_len_ratio = 0.7
+    expand_ratio = 1.45
+    max_r = 2.0
+    step_r = 0.2
+    polys = []
+    for k, box in enumerate(boxes):
+        w, h = int(np.linalg.norm(box[0] - box[1]) + 1), int(np.linalg.norm(box[1] - box[2]) + 1)
+        if w < 10 or h < 10:
+            polys.append(None); continue
+        tar = np.float32([[0,0],[w,0],[w,h],[0,h]])
+        M = cv2.getPerspectiveTransform(box, tar)
+        word_label = cv2.warpPerspective(labels, M, (w, h), flags=cv2.INTER_NEAREST)
+        try:
+            Minv = np.linalg.inv(M)
+        except:
+            polys.append(None); continue
+        cur_label = mapper[k]
+        word_label[word_label != cur_label] = 0
+        word_label[word_label > 0] = 1
+        cp = []
+        max_len = -1
+        for i in range(w):
+            region = np.where(word_label[:,i] != 0)[0]
+            if len(region) < 2 : continue
+            cp.append((i, region[0], region[-1]))
+            length = region[-1] - region[0] + 1
+            if length > max_len: max_len = length
+        if h * max_len_ratio < max_len:
+            polys.append(None); continue
+        tot_seg = num_cp * 2 + 1
+        seg_w = w / tot_seg
+        pp = [None] * num_cp
+        cp_section = [[0, 0]] * tot_seg
+        seg_height = [0] * num_cp
+        seg_num = 0
+        num_sec = 0
+        prev_h = -1
+        for i in range(0,len(cp)):
+            (x, sy, ey) = cp[i]
+            if (seg_num + 1) * seg_w <= x and seg_num <= tot_seg:
+                # average previous segment
+                if num_sec == 0: break
+                cp_section[seg_num] = [cp_section[seg_num][0] / num_sec, cp_section[seg_num][1] / num_sec]
+                num_sec = 0
+                # reset variables
+                seg_num += 1
+                prev_h = -1
+            # accumulate center points
+            cy = (sy + ey) * 0.5
+            cur_h = ey - sy + 1
+            cp_section[seg_num] = [cp_section[seg_num][0] + x, cp_section[seg_num][1] + cy]
+            num_sec += 1
+            if seg_num % 2 == 0: continue # No polygon area
+            if prev_h < cur_h:
+                pp[int((seg_num - 1)/2)] = (x, cy)
+                seg_height[int((seg_num - 1)/2)] = cur_h
+                prev_h = cur_h
+        # processing last segment
+        if num_sec != 0:
+            cp_section[-1] = [cp_section[-1][0] / num_sec, cp_section[-1][1] / num_sec]
+        # pass if num of pivots is not sufficient or segment widh is smaller than character height
+        if None in pp or seg_w < np.max(seg_height) * 0.25:
+            polys.append(None); continue
+        # calc median maximum of pivot points
+        half_char_h = np.median(seg_height) * expand_ratio / 2
+        # calc gradiant and apply to make horizontal pivots
+        new_pp = []
+        for i, (x, cy) in enumerate(pp):
+            dx = cp_section[i * 2 + 2][0] - cp_section[i * 2][0]
+            dy = cp_section[i * 2 + 2][1] - cp_section[i * 2][1]
+            if dx == 0:     # gradient if zero
+                new_pp.append([x, cy - half_char_h, x, cy + half_char_h])
+                continue
+            rad = - math.atan2(dy, dx)
+            c, s = half_char_h * math.cos(rad), half_char_h * math.sin(rad)
+            new_pp.append([x - s, cy - c, x + s, cy + c])
+        # get edge points to cover character heatmaps
+        isSppFound, isEppFound = False, False
+        grad_s = (pp[1][1] - pp[0][1]) / (pp[1][0] - pp[0][0]) + (pp[2][1] - pp[1][1]) / (pp[2][0] - pp[1][0])
+        grad_e = (pp[-2][1] - pp[-1][1]) / (pp[-2][0] - pp[-1][0]) + (pp[-3][1] - pp[-2][1]) / (pp[-3][0] - pp[-2][0])
+        for r in np.arange(0.5, max_r, step_r):
+            dx = 2 * half_char_h * r
+            if not isSppFound:
+                line_img = np.zeros(word_label.shape, dtype=np.uint8)
+                dy = grad_s * dx
+                p = np.array(new_pp[0]) - np.array([dx, dy, dx, dy])
+                cv2.line(line_img, (int(p[0]), int(p[1])), (int(p[2]), int(p[3])), 1, thickness=1)
+                if np.sum(np.logical_and(word_label, line_img)) == 0 or r + 2 * step_r >= max_r:
+                    spp = p
+                    isSppFound = True
+            if not isEppFound:
+                line_img = np.zeros(word_label.shape, dtype=np.uint8)
+                dy = grad_e * dx
+                p = np.array(new_pp[-1]) + np.array([dx, dy, dx, dy])
+                cv2.line(line_img, (int(p[0]), int(p[1])), (int(p[2]), int(p[3])), 1, thickness=1)
+                if np.sum(np.logical_and(word_label, line_img)) == 0 or r + 2 * step_r >= max_r:
+                    epp = p
+                    isEppFound = True
+            if isSppFound and isEppFound:
+                break
+        if not (isSppFound and isEppFound):
+            polys.append(None); continue
+        poly = []
+        poly.append(warpCoord(Minv, (spp[0], spp[1])))
+        for p in new_pp:
+            poly.append(warpCoord(Minv, (p[0], p[1])))
+        poly.append(warpCoord(Minv, (epp[0], epp[1])))
+        poly.append(warpCoord(Minv, (epp[2], epp[3])))
+        for p in reversed(new_pp):
+            poly.append(warpCoord(Minv, (p[2], p[3])))
+        poly.append(warpCoord(Minv, (spp[2], spp[3])))
+        # add to final result
+        polys.append(np.array(poly))
+    return polys
+def getDetBoxes(textmap, linkmap, text_threshold, link_threshold, low_text, poly=False):
+    boxes, labels, mapper = getDetBoxes_core(textmap, linkmap, text_threshold, link_threshold, low_text)
+    if poly:
+        polys = getPoly_core(boxes, labels, mapper, linkmap)
+    else:
+        polys = [None] * len(boxes)
+    return boxes, polys
+def adjustResultCoordinates(polys, ratio_w, ratio_h, ratio_net = 2):
+    if len(polys) > 0:
+        polys = np.array(polys)
+        for k in range(len(polys)):
+            if polys[k] is not None:
+                polys[k] *= (ratio_w * ratio_net, ratio_h * ratio_net)
+    return polys

dino2/__pycache__/model.cpython-310.pyc ADDED Viewed

Binary file (2.85 kB). View file

dino2/model.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import torch
+import torch.nn as nn
+from torch.hub import load
+import torchvision.models as models
+dino_backbones = {
+    'dinov2_s':{
+        'name':'dinov2_vits14',
+        'embedding_size':384,
+        'patch_size':14
+    },
+    'dinov2_b':{
+        'name':'dinov2_vitb14',
+        'embedding_size':768,
+        'patch_size':14
+    },
+    'dinov2_l':{
+        'name':'dinov2_vitl14',
+        'embedding_size':1024,
+        'patch_size':14
+    },
+    'dinov2_g':{
+        'name':'dinov2_vitg14',
+        'embedding_size':1536,
+        'patch_size':14
+    },
+}
+class linear_head(nn.Module):
+    def __init__(self, embedding_size = 384, num_classes = 5):
+        super(linear_head, self).__init__()
+        self.fc = nn.Linear(embedding_size, num_classes)
+    def forward(self, x):
+        return self.fc(x)
+class conv_head(nn.Module):
+    def __init__(self, embedding_size = 384, num_classes = 5):
+        super(conv_head, self).__init__()
+        self.segmentation_conv = nn.Sequential(
+            nn.Upsample(scale_factor=2),
+            nn.Conv2d(embedding_size, 64, (3,3), padding=(1,1)),
+            nn.Upsample(scale_factor=2),
+            nn.Conv2d(64, num_classes, (3,3), padding=(1,1)),
+        )
+    def forward(self, x):
+        x = self.segmentation_conv(x)
+        x = torch.sigmoid(x)
+        return x
+def threshold_mask(predicted, threshold=0.55):
+    thresholded_mask = (predicted > threshold).float()
+    return thresholded_mask
+class Segmentor(nn.Module):
+    def __init__(self, device,num_classes, backbone = 'dinov2_s', head = 'conv', backbones = dino_backbones):
+        super(Segmentor, self).__init__()
+        self.heads = {
+            'conv':conv_head
+        }
+        self.backbones = dino_backbones
+        self.backbone = load('facebookresearch/dinov2', self.backbones[backbone]['name'])
+        self.backbone.eval()
+        self.num_classes =  num_classes
+        self.embedding_size = self.backbones[backbone]['embedding_size']
+        self.patch_size = self.backbones[backbone]['patch_size']
+        self.head = self.heads[head](self.embedding_size,self.num_classes)
+        self.device=device
+    def forward(self, x):
+        batch_size = x.shape[0]
+        mask_dim = (x.shape[2] / self.patch_size, x.shape[3] / self.patch_size)
+        x = self.backbone.forward_features(x.to(self.device))
+        x = x['x_norm_patchtokens']
+        x = x.permute(0,2,1)
+        x = x.reshape(batch_size,self.embedding_size,int(mask_dim[0]),int(mask_dim[1]))
+        x = self.head(x)
+        return x

file_utils.py ADDED Viewed

	@@ -0,0 +1,77 @@

+# -*- coding: utf-8 -*-
+import os
+import numpy as np
+import cv2
+import imgproc
+from mosaik import mosaik
+# borrowed from https://github.com/lengstrom/fast-style-transfer/blob/master/src/utils.py
+def get_files(img_dir):
+    imgs, masks, xmls = list_files(img_dir)
+    return imgs, masks, xmls
+def list_files(in_path):
+    img_files = []
+    mask_files = []
+    gt_files = []
+    for (dirpath, dirnames, filenames) in os.walk(in_path):
+        for file in filenames:
+            filename, ext = os.path.splitext(file)
+            ext = str.lower(ext)
+            if ext == '.jpg' or ext == '.jpeg' or ext == '.gif' or ext == '.png' or ext == '.pgm':
+                img_files.append(os.path.join(dirpath, file))
+            elif ext == '.bmp':
+                mask_files.append(os.path.join(dirpath, file))
+            elif ext == '.xml' or ext == '.gt' or ext == '.txt':
+                gt_files.append(os.path.join(dirpath, file))
+            elif ext == '.zip':
+                continue
+    # img_files.sort()
+    # mask_files.sort()
+    # gt_files.sort()
+    return img_files, mask_files, gt_files
+def saveResult(img_file, img, boxes, dirname='./result/', verticals=None, texts=None):
+        """ save text detection result one by one
+        Args:
+            img_file (str): image file name
+            img (array): raw image context
+            boxes (array): array of result file
+                Shape: [num_detections, 4] for BB output / [num_detections, 4] for QUAD output
+        Return:
+            None
+        """
+        img = np.array(img)
+        # make result file list
+        filename, file_ext = os.path.splitext(os.path.basename(img_file))
+        # result directory
+        res_file = dirname + "res_" + filename + '.txt'
+        res_img_file = dirname + "res_" + filename + '.jpg'
+        if not os.path.isdir(dirname):
+            os.mkdir(dirname)
+        with open(res_file, 'w') as f:
+            for i, box in enumerate(boxes):
+                poly = np.array(box).astype(np.int32).reshape((-1))
+                strResult = ','.join([str(p) for p in poly]) + '\r\n'
+                f.write(strResult)
+                poly = poly.reshape(-1, 2)
+                cv2.polylines(img, [poly.reshape((-1, 1, 2))], True, color=(0, 0, 255), thickness=2)
+                ptColor = (0, 255, 255)
+                if verticals is not None:
+                    if verticals[i]:
+                        ptColor = (255, 0, 0)
+                if texts is not None:
+                    font = cv2.FONT_HERSHEY_SIMPLEX
+                    font_scale = 0.5
+                    cv2.putText(img, "{}".format(texts[i]), (poly[0][0]+1, poly[0][1]+1), font, font_scale, (0, 0, 0), thickness=1)
+                    cv2.putText(img, "{}".format(texts[i]), tuple(poly[0]), font, font_scale, (0, 255, 255), thickness=1)
+        # Save result image
+        cv2.imwrite(res_img_file, img)
+        return img

imgproc.py ADDED Viewed

	@@ -0,0 +1,70 @@

+"""
+Copyright (c) 2019-present NAVER Corp.
+MIT License
+"""
+# -*- coding: utf-8 -*-
+import numpy as np
+from skimage import io
+import cv2
+def loadImage(img_file):
+    img = io.imread(img_file)           # RGB order
+    if img.shape[0] == 2: img = img[0]
+    if len(img.shape) == 2 : img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
+    if img.shape[2] == 4:   img = img[:,:,:3]
+    img = np.array(img)
+    return img
+def normalizeMeanVariance(in_img, mean=(0.485, 0.456, 0.406), variance=(0.229, 0.224, 0.225)):
+    # should be RGB order
+    img = in_img.copy().astype(np.float32)
+    img -= np.array([mean[0] * 255.0, mean[1] * 255.0, mean[2] * 255.0], dtype=np.float32)
+    img /= np.array([variance[0] * 255.0, variance[1] * 255.0, variance[2] * 255.0], dtype=np.float32)
+    return img
+def denormalizeMeanVariance(in_img, mean=(0.485, 0.456, 0.406), variance=(0.229, 0.224, 0.225)):
+    # should be RGB order
+    img = in_img.copy()
+    img *= variance
+    img += mean
+    img *= 255.0
+    img = np.clip(img, 0, 255).astype(np.uint8)
+    return img
+def resize_aspect_ratio(img, square_size, interpolation, mag_ratio=1):
+    height, width, channel = img.shape
+    # magnify image size
+    target_size = mag_ratio * max(height, width)
+    # set original image size
+    if target_size > square_size:
+        target_size = square_size
+    ratio = target_size / max(height, width)
+    target_h, target_w = int(height * ratio), int(width * ratio)
+    proc = cv2.resize(img, (target_w, target_h), interpolation = interpolation)
+    # make canvas and paste image
+    target_h32, target_w32 = target_h, target_w
+    if target_h % 32 != 0:
+        target_h32 = target_h + (32 - target_h % 32)
+    if target_w % 32 != 0:
+        target_w32 = target_w + (32 - target_w % 32)
+    resized = np.zeros((target_h32, target_w32, channel), dtype=np.float32)
+    resized[0:target_h, 0:target_w, :] = proc
+    target_h, target_w = target_h32, target_w32
+    size_heatmap = (int(target_w/2), int(target_h/2))
+    return resized, ratio, size_heatmap
+def cvt2HeatmapImg(img):
+    img = (np.clip(img, 0, 1) * 255).astype(np.uint8)
+    img = cv2.applyColorMap(img, cv2.COLORMAP_JET)
+    return img

input/1.png ADDED Viewed

input/2.png ADDED Viewed

input/3.png ADDED Viewed

input/4.png ADDED Viewed

install.sh ADDED Viewed

	@@ -0,0 +1 @@


1	+ pip install

main.py ADDED Viewed

	@@ -0,0 +1,354 @@

+from recognize import recongize
+from ner import ner
+import os
+import time
+import argparse
+from sr.sr import sr
+import torch
+from scipy.ndimage import gaussian_filter
+from PIL import Image
+import numpy as np
+import torch.nn as nn
+import torch.backends.cudnn as cudnn
+from torch.autograd import Variable
+from mosaik import mosaik
+from PIL import Image
+import cv2
+from skimage import io
+import numpy as np
+import craft_utils
+import imgproc
+import file_utils
+from seg import mask_percentage
+from seg2 import dino_seg
+from craft import CRAFT
+from collections import OrderedDict
+import gradio as gr
+from refinenet import RefineNet
+# craft, refine 모델 불러오는 코드
+def copyStateDict(state_dict):
+    if list(state_dict.keys())[0].startswith("module"):
+        start_idx = 1
+    else:
+        start_idx = 0
+    new_state_dict = OrderedDict()
+    for k, v in state_dict.items():
+        name = ".".join(k.split(".")[start_idx:])
+        new_state_dict[name] = v
+    return new_state_dict
+def str2bool(v):
+    return v.lower() in ("yes", "y", "true", "t", "1")
+parser = argparse.ArgumentParser(description='CRAFT Text Detection')
+parser.add_argument('--trained_model', default='weights/craft_mlt_25k.pth', type=str, help='사전학습 craft 모델')
+parser.add_argument('--text_threshold', default=0.7, type=float, help='text confidence threshold')
+parser.add_argument('--low_text', default=0.4, type=float, help='text low-bound score')
+parser.add_argument('--link_threshold', default=0.4, type=float, help='link confidence threshold')
+parser.add_argument('--cuda', default=True, type=str2bool, help='Use cuda for inference')
+parser.add_argument('--canvas_size', default=1280, type=int, help='image size for inference')
+parser.add_argument('--mag_ratio', default=1.5, type=float, help='image magnification ratio')
+parser.add_argument('--poly', default=False, action='store_true', help='enable polygon type')
+parser.add_argument('--refine', default=True, help='enable link refiner')
+parser.add_argument('--image_path', default="input/2.png", help='input image')
+parser.add_argument('--refiner_model', default='weights/craft_refiner_CTW1500.pth', type=str, help='pretrained refiner model')
+args = parser.parse_args()
+# 아래는 option
+def full_img_masking(full_image,net,refine_net):
+    reference_image=sr(full_image)
+    reference_boxes=text_detect(reference_image,net=net,refine_net=refine_net)
+    boxes=get_box_from_refer(reference_boxes)
+    for index2,box in enumerate(boxes):
+        xmin,xmax,ymin,ymax=get_min_max(box)
+        text_area=full_image[int(ymin):int(ymax),int(xmin):int(xmax),:]
+        text=recongize(text_area)
+        label=ner(text)
+        if label==1:
+            A=full_image[int(ymin):int(ymax),int(xmin):int(xmax),:]
+            full_image[int(ymin):int(ymax),int(xmin):int(xmax),:] = gaussian_filter(A, sigma=16)
+    return full_image
+def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly, refine_net=None):
+    t0 = time.time()
+    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, args.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=args.mag_ratio)
+    ratio_h = ratio_w = 1 / target_ratio
+    x = imgproc.normalizeMeanVariance(img_resized)
+    x = torch.from_numpy(x).permute(2, 0, 1)
+    x = Variable(x.unsqueeze(0))
+    if cuda:
+        x = x.cuda()
+    with torch.no_grad():
+        y, feature = net(x)
+    score_text = y[0,:,:,0].cpu().data.numpy()
+    score_link = y[0,:,:,1].cpu().data.numpy()
+    if refine_net is not None:
+        with torch.no_grad():
+            y_refiner = refine_net(y, feature)
+        score_link = y_refiner[0,:,:,0].cpu().data.numpy()
+    t0 = time.time() - t0
+    t1 = time.time()
+    boxes, polys = craft_utils.getDetBoxes(score_text, score_link, text_threshold, link_threshold, low_text, poly)
+    boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
+    polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
+    for k in range(len(polys)):
+        if polys[k] is None: polys[k] = boxes[k]
+    t1 = time.time() - t1
+    # render results (optional)
+    render_img = score_text.copy()
+    render_img = np.hstack((render_img, score_link))
+    ret_score_text = imgproc.cvt2HeatmapImg(render_img)
+    return boxes, polys, ret_score_text
+def text_detect(image,net,refine_net):
+        bboxes, polys, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda, args.poly, refine_net)
+        return bboxes
+def get_box_from_refer(reference_boxes):
+    real_boxes=[]
+    for box in reference_boxes:
+        real_boxes.append(box//2)
+    return real_boxes
+def get_min_max(box):
+    xlist=[]
+    ylist=[]
+    for coor in box:
+        xlist.append(coor[0])
+        ylist.append(coor[1])
+    return min(xlist),max(xlist),min(ylist),max(ylist)
+def main(image_path0):
+# 1단계
+    # ==> craft 모델과 refinenet 모델을 불러오고 cuda device 에 얹힙니다.
+    net = CRAFT()
+    if args.cuda:
+        net.load_state_dict(copyStateDict(torch.load(args.trained_model)))
+    if args.cuda:
+        net = net.cuda()
+        cudnn.benchmark = False
+    net.eval()
+    refine_net = None
+    if args.refine:
+        refine_net = RefineNet()
+        if args.cuda:
+            refine_net.load_state_dict(copyStateDict(torch.load(args.refiner_model)))
+            refine_net = refine_net.cuda()
+        refine_net.eval()
+        args.poly = True
+# 2단계
+    # gradio 빈칸에 이미지를 넣고 A 에 입력됩니다.
+    A=image_path0
+    image_list=[]
+    image_list.append(A)
+    for k, image_path in enumerate(image_list):
+        image = imgproc.loadImage(image_path)
+        if image.shape[2]>3:
+            image=image[:,:,0:3]
+        original_image=image
+        # 이미지에서 송장부분만 dinov2 모델로 segmentation 을 합니다.
+        output=dino_seg(image)
+        image3=Image.fromarray(output)
+        image3.save("temporal_mask/mask.png")
+        # 마스크이미지(white pixel, black background)를 만듭니다.
+        # 위 마스크 이미지에서 각 덩어리들(송장으로 추정)이 전체 이미지내에서 몇프로차지하는지 계산합니다.
+        contours_list,percentage_list=mask_percentage("temporal_mask/mask.png")
+        normal_image_list=[]
+        small_coordinate_list=[]
+        original_coordinate_list=[]
+#3단계
+        sorted_list = sorted(percentage_list, reverse=True)
+        top_5 = sorted_list[:5]
+        print("상위 5개 값:", top_5)
+        # percentage list의 경우 송장으로 추정되는 뭉치들의 퍼센트를 모아놓은것이고
+        # contours list는 이미지내에서 송장으로 추정되는 뭉치들이 크롭되어서 정렬된 리스트입니다.
+        # 예 : percentatge list 의 첫번째 요소는 contours list의 첫번째 요소의 percentage
+        for index,percentage in enumerate(percentage_list):
+            if 5<percentage:
+            # percentage 가 아미지내에서 5프로 넘는 것들은 normal list로 포함됩니다.
+            # normal list안에는 이미지내에서 충분히 큰 뭉치들(송장으로 추정) 을 모아놓았습니다.
+            # 1-5프로 인것들은 small coordinate list에 포함되고 매우 작은 뭉치로 간주합니다.
+            # 매우작은 뭉치의 경우 zoom in을 했을때 뭉치(송장으로 추정)내 글자가 거의 보이지않아서 따라서 뭉치 전체를 mosaik합니다.
+            # 1프로미만 뭉치들은 소멸직전일정도로 작아 생략합니다.
+                contour=contours_list[index]
+                x_list=[]
+                y_list=[]
+                contour2=list(contour)
+                for r in contour2:
+                    r2=r[0]
+                    x_list.append(r2[0])
+                    y_list.append(r2[1])
+                x_min=min(x_list)
+                y_min=min(y_list)
+                x_max=max(x_list)
+                y_max=max(y_list)
+                original_coordinate_list.append([y_min,y_max,x_min,x_max])
+                image2=original_image[y_min:y_max,x_min:x_max,:]
+                normal_image_list.append(image2)
+            #
+            elif 1<percentage<5:
+                contour=contours_list[index]
+                x_list=[]
+                y_list=[]
+                contour2=list(contour)
+                for r in contour2:
+                    r2=r[0]
+                    x_list.append(r2[0])
+                    y_list.append(r2[1])
+                x_min=min(x_list)
+                y_min=min(y_list)
+                x_max=max(x_list)
+                y_max=max(y_list)
+                small_coordinate_list.append([y_min,y_max,x_min,x_max]) #송장 5프로미만의 좌표
+            else:
+                continue
+        # 4단계 (매우작은 송장)
+        # small coordinate list안에 매우작은 송장들이 모여져있지만 list안에 요소가 없으면 5단계로 바로갑니다.
+        # 바로 가지않을경우(list 안요소 최소하나) mosaik 를 통해서 전체이미지에서 작은 뭉치에 해당하는 좌표들을 모두 모자이크합니다.
+        if len(small_coordinate_list)>0:
+            original_image=mosaik(original_image,small_coordinate_list)
+        else:
+            pass
+        # 5단계 (어느정도 사이즈 있는 송장) ==> normal list
+        # normal image list안에 적절한 크기의 송장(줌 하면 글자 보이는) 들이 있습니다.
+        # craft 입장에서 text 위치를 return 할수 있게끔 크롭된 송장을 esrgan 으로 화질개선합니다.
+        # 화질개선된 송장을 craft에 넣어서 정확하게 text 좌표들을 모두 구합니다.
+        # 좌표를 구할때 화질 좋은 송장이미지의 좌표를 그대로 return 하지 않고 원본 송장이미지에 맞추어서 scale(//2) 하고 최종좌표를 구합니다.
+        for index,normal_image in enumerate(normal_image_list):
+            reference_image=sr(normal_image)
+            reference_boxes=text_detect(reference_image,net=net,refine_net=refine_net)
+            boxes=get_box_from_refer(reference_boxes)
+            for index2,box in enumerate(boxes):
+                xmin,xmax,ymin,ymax=get_min_max(box)
+                text_area=normal_image[int(ymin):int(ymax),int(xmin):int(xmax),:]
+                text_area=Image.fromarray(text_area)
+                os.makedirs("text_area",exist_ok=True)
+                text_area.save(f"text_area/new_{index2+1}.png")
+        # 6단계 (text recognize, ner)
+        # 위 좌표들을 통해서 송장 내에서 박스들을 크롭합니다.
+        # 크롭된 송장내 부분(크롭된 박스 , 즉 text 있는곳으로 추정되는곳) 을 trocr 에넣습니다.
+        # trocr은 상자내에 추정되는 text를 보여줍니다.
+        # text를 ko electra 에넣어서 해당 상자에있는 text가 개인정보인지아닌지 판별합니다.
+        # 송장내 해당 상자가 개인정보로(레이블 :1) 추정될경우 모자이크를합니다.
+        # 모자이크라고 판별할경우 해당상자의 좌표를 송장이미지에 맞는 좌표로 변환하고 그 좌표에 해당하는 부분을 모자이크합니다.
+        # 부분적으로 모자이크된 송장이미지를 전체이미지(송장을 포함하는 이미지)에 붙입니다.
+                text=recongize(text_area)
+                label=ner(text)
+                with open("output/text_recongnize.txt","a") as recognized:
+                    recognized.writelines(str(index2+1))
+                    recognized.writelines(" ")
+                    recognized.writelines(str(text))
+                    recognized.writelines(" ")
+                    recognized.writelines(str(label))
+                    recognized.writelines("\n")
+                    recognized.close()
+                print("done")
+                if label==1:
+                   A=normal_image[int(ymin):int(ymax),int(xmin):int(xmax),:]
+                   normal_image[int(ymin):int(ymax),int(xmin):int(xmax),:] = gaussian_filter(A, sigma=16)
+                else:
+                    pass
+            a,b,c,d=original_coordinate_list[index]
+            original_image[a:b,c:d,:]=normal_image
+        # 더 정확도 높이기위해서 이미지 전체(송장과 배경 둘다) craft에 통째로 넣기
+        # 단 optional (단점 : infer speed )
+        #print("full mask start")
+        #original_image=full_img_masking(original_image,net=net,refine_net=refine_net)
+        #print("full mask done")
+        original_image=Image.fromarray(original_image)
+        original_image.save("output/mosaiked.png")
+        print("masked complete")
+        return original_image
+if __name__ == '__main__':
+    iface = gr.Interface(
+        fn=main,
+        inputs=gr.Image(type="filepath", label="Invoice Image"),
+        outputs=gr.Image(type="pil", label="Masked Invoice Image"),
+        live=True
+    )
+    iface.launch()

mosaik.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import numpy as np
+from PIL import Image
+from scipy.ndimage.filters import gaussian_filter
+import cv2
+import numpy as np
+from scipy.ndimage import gaussian_filter
+import matplotlib.pyplot as plt
+from matplotlib.patches import Polygon
+# 1프로에서 5프로사이의 crop 된부분만 전체 마스킹하는 코드입니다.
+def mosaik(img,bboxes):
+    for box in bboxes:
+        #[y_min,y_max,x_min,x_max]) #
+        cropped=img[box[0]:box[1],box[2]:box[3],:]
+        cropped=np.array(cropped)
+        cropped = gaussian_filter(cropped, sigma=16)
+        img[box[0]:box[1],box[2]:box[3],:]=cropped
+    return img

ner.py ADDED Viewed

	@@ -0,0 +1,106 @@

+from transformers import AutoTokenizer, AutoModelForTokenClassification
+from transformers import pipeline
+from collections import defaultdict
+import torch
+device = torch.device("cuda")
+tokenizer = AutoTokenizer.from_pretrained("Leo97/KoELECTRA-small-v3-modu-ner")
+model = AutoModelForTokenClassification.from_pretrained("Leo97/KoELECTRA-small-v3-modu-ner")
+model.to(device)
+# 송장이라 추정되는부분을 craft에 통과시키고 text 가 있는부분을 크롭해서 trocr로 text를 그 영역에 뽑아낸이후 프로세스입니다.
+# 뽑힌 text에 대한 class를 판별합니다.
+# text에 대한 class가 "사람이름 PS", "도로/건물 이름 AF", "주소 LC" 에 속하면 1을 반환하여 이후 모자이크 하도록합니다.
+# ner 모델은 text를 어절 마다 쪼개서 각 단어에 대한 class를 반환합니다.
+# 이 때 모든 단어에 대한 class를 고려하다보면 infer speed 가 매우느려서 최소한 하나라도 ps,af,lc 클래스 해당 단어가 있으면 1 반환하도록합니다.
+def check_entity(entities):
+    for entity_info in entities:
+        entity_value = entity_info.get('entity', '').upper()
+        if 'LC' in entity_value or 'PS' in entity_value or 'AF' in entity_value:
+            return 1
+    return 0
+def ner(example):
+  ner = pipeline("ner", model=model, tokenizer=tokenizer,device=device)
+  ner_results = ner(example)
+  ner_results=check_entity(ner_results)
+  return ner_results
+# 하나
+# def find_longest_value_key(input_dict):
+#     max_length = 0
+#     max_length_keys = []
+#     for key, value in input_dict.items():
+#         current_length = len(value)
+#         if current_length > max_length:
+#             max_length = current_length
+#             max_length_keys = [key]
+#         elif current_length == max_length:
+#             max_length_keys.append(key)
+#     if len(max_length_keys) == 1:
+#         return 0
+#     else:
+#         return 1
+# def find_longest_value_key2(input_dict):
+#     if not input_dict:
+#         return None
+#     max_key = max(input_dict, key=lambda k: len(input_dict[k]))
+#     return max_key
+# def find_most_frequent_entity(entities):
+#     entity_counts = defaultdict(list)
+#     for item in entities:
+#         split_entity = item['entity'].split('-')
+#         entity_type = split_entity[1]
+#         entity_counts[entity_type].append(item['score'])
+#     number=find_longest_value_key(entity_counts)
+#     if number==1:
+#       max_entities = []
+#       max_score_average = -1
+#       for entity, scores in entity_counts.items():
+#           score_average = sum(scores) / len(scores)
+#           if score_average > max_score_average:
+#               max_entities = [entity]
+#               max_score_average = score_average
+#           elif score_average == max_score_average:
+#               max_entities.append(entity)
+#       if len(max_entities)>0:
+#            return max_entities if len(max_entities) > 1 else max_entities[0]
+#       else:
+#            return "Do not mosaik"
+#     else:
+#       A=find_longest_value_key2(entity_counts)
+#       return A
+# 하나라도 ps 나 lc 가 있으면 바로 ps , lc 꺼내기
+#   label=filtering(ner_results)
+#   if label.find("PS")>-1 or label.find("LC")>-1:
+#     return 1
+#   else:
+#     return 0
+#print(ner("홍길동"))
+#label=check_label(example)

recognize.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from transformers import TrOCRProcessor, VisionEncoderDecoderModel, AutoTokenizer
+import unicodedata
+# huggingface 에서 trocr 모델 weight을 가져오고 해당 weight을 fine tuning 하여서 trocr_weight folder에 저장하였습니다. (tokenizer, processor도 같이저장)
+# recognize가 받는 이미지는 송장내에서 craft로 크롭된 부분이고 text가 있는곳으로 추정되는 부분입니다.
+# 해당 영역에서 있을법한 text내용을 추출합니다.
+def recongize(img):
+    processor = TrOCRProcessor.from_pretrained("trocr_weight")
+    model = VisionEncoderDecoderModel.from_pretrained("trocr_weight")
+    tokenizer = AutoTokenizer.from_pretrained("trocr_weight")
+    pixel_values = processor(img, return_tensors="pt").pixel_values
+    generated_ids = model.generate(pixel_values, max_length=64)
+    generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    generated_text = unicodedata.normalize("NFC", generated_text)
+    return generated_text

refinenet.py ADDED Viewed

	@@ -0,0 +1,65 @@

+"""
+Copyright (c) 2019-present NAVER Corp.
+MIT License
+"""
+# -*- coding: utf-8 -*-
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import Variable
+from basenet.vgg16_bn import init_weights
+class RefineNet(nn.Module):
+    def __init__(self):
+        super(RefineNet, self).__init__()
+        self.last_conv = nn.Sequential(
+            nn.Conv2d(34, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(inplace=True),
+            nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(inplace=True),
+            nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(inplace=True)
+        )
+        self.aspp1 = nn.Sequential(
+            nn.Conv2d(64, 128, kernel_size=3, dilation=6, padding=6), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
+            nn.Conv2d(128, 128, kernel_size=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
+            nn.Conv2d(128, 1, kernel_size=1)
+        )
+        self.aspp2 = nn.Sequential(
+            nn.Conv2d(64, 128, kernel_size=3, dilation=12, padding=12), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
+            nn.Conv2d(128, 128, kernel_size=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
+            nn.Conv2d(128, 1, kernel_size=1)
+        )
+        self.aspp3 = nn.Sequential(
+            nn.Conv2d(64, 128, kernel_size=3, dilation=18, padding=18), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
+            nn.Conv2d(128, 128, kernel_size=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
+            nn.Conv2d(128, 1, kernel_size=1)
+        )
+        self.aspp4 = nn.Sequential(
+            nn.Conv2d(64, 128, kernel_size=3, dilation=24, padding=24), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
+            nn.Conv2d(128, 128, kernel_size=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
+            nn.Conv2d(128, 1, kernel_size=1)
+        )
+        init_weights(self.last_conv.modules())
+        init_weights(self.aspp1.modules())
+        init_weights(self.aspp2.modules())
+        init_weights(self.aspp3.modules())
+        init_weights(self.aspp4.modules())
+    def forward(self, y, upconv4):
+        refine = torch.cat([y.permute(0,3,1,2), upconv4], dim=1)
+        refine = self.last_conv(refine)
+        aspp1 = self.aspp1(refine)
+        aspp2 = self.aspp2(refine)
+        aspp3 = self.aspp3(refine)
+        aspp4 = self.aspp4(refine)
+        #out = torch.add([aspp1, aspp2, aspp3, aspp4], dim=1)
+        out = aspp1 + aspp2 + aspp3 + aspp4
+        return out.permute(0, 2, 3, 1)  # , refine.permute(0,2,3,1)

requirements.txt ADDED Viewed

	@@ -0,0 +1,18 @@

+numpy
+torch
+pillow
+matplotlib
+transformers
+scipy
+torchvision
+unicodedata
+opencv-python
+scikit-image
+math
+os
+collections

reset.sh ADDED Viewed

	@@ -0,0 +1,15 @@

+rm -rf output
+mkdir output
+rm -rf flagged
+rm -rf temporal_mask
+mkdir temporal_mask
+rm -rf text_area
+mkdir text_area
+# inference 이후 시행 (결과 폴더 정리)

seg.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from unet.predict import predict_img,mask_to_image
+import torch
+from PIL import Image
+import numpy as np
+import cv2
+# 아래는 unet이고 dinov2 전에 실험했던 모델입니다.
+def segmentation(img):
+    device=torch.device("cuda")
+    net=torch.load("weights/unet.pth")
+    mask_values=[[0, 0, 0], [255, 255, 255]]
+    mask=predict_img(net,img,device,scale_factor=1,out_threshold=0.5)
+    result = mask_to_image(mask, mask_values)
+    result=np.array(result)
+    return result
+# 위 segmentation 을 통해서 crop 된 부분이 이미지내에서 몇프로 차지하는지 계산합니다.
+# 아래 함수는 dinov2, unet모두에게 적용합니다
+# 아래 코드는 하얀색 픽셀이 연속적으로 이어져서 만들어진 덩어리가 전체에서 몇프로 차지하는지 계산합니다.
+# 아래 코드는 덩어리(송장으로 추정) 들이 2개 이상이어도 적용할수 있습니다.
+def mask_percentage(mask_path):
+    image = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
+    ret, threshold = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)
+    contours, hierarchy = cv2.findContours(threshold, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    total_area = image.shape[0] * image.shape[1]
+    contours_list=contours
+    contour_areas = [cv2.contourArea(contour) for contour in contours]
+    percentages = [(area / total_area) * 100 for area in contour_areas]
+    percentage_list=[]
+    for i, percentage in enumerate(percentages):
+        percentage_list.append(percentage)
+    return contours_list,percentage_list

seg2.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import torch
+from dino2.model import Segmentor
+from PIL import Image
+from torchvision import transforms
+import numpy as np
+import cv2
+T2=transforms.ToPILImage()
+weights="/home/ksyint/other1213/craft_ku/weights/dinov2.pt"
+device=torch.device("cpu")
+model = Segmentor(device,1,backbone = 'dinov2_b',head="conv")
+model.load_state_dict(torch.load(weights,map_location="cpu"))
+model = model.to(device)
+img_transform = transforms.Compose([
+    transforms.Resize((14*64,14*64)),
+    transforms.ToTensor(),
+    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+])
+# 송장부분을 crop 하기 위한 코드
+# dinov2 모델을 segmentation 모델로 활용하였고 이를 train 하여서 pretrained weight를 weight 폴더에 저장하고 아래와 같이 load합니다.
+def dino_seg(numpy_array):
+    img0=Image.fromarray(numpy_array)
+    original_size=img0.size
+    img=img_transform(img0)
+    a=img.unsqueeze(0)
+    b=model(a)
+    b=b.squeeze(0)
+    b=b*255.0
+    model_output=T2(b) #pil image
+    model_output=model_output.resize(original_size)
+    model_output=np.array(model_output)
+    model_output[model_output > 220] = 255.0
+    model_output[model_output <= 220] = 0.0
+    model_output2=model_output
+    model_output3=model_output
+    output = np.stack([model_output, model_output2, model_output3])
+    output=np.transpose(output,(1,2,0))
+    return output
+# def find_connected_components(image):
+#     _, binary_image = cv2.threshold(image, 150, 255, cv2.THRESH_BINARY)
+#     _, labels, stats, centroids = cv2.connectedComponentsWithStats(binary_image, connectivity=4)
+#     largest_component_index = np.argmax(stats[1:, cv2.CC_STAT_AREA]) + 1
+#     x, y, w, h,_=stats[largest_component_index, cv2.CC_STAT_LEFT:cv2.CC_STAT_TOP+cv2.CC_STAT_HEIGHT+1]
+#     return x, y, w, h

sr/__pycache__/sr.cpython-310.pyc ADDED Viewed

Binary file (606 Bytes). View file

sr/esrgan ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit 362a0316878f41dbdfbb23657b450c3353de5acf

sr/sr.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import torch
+from PIL import Image
+import numpy as np
+from .esrgan.RealESRGAN import RealESRGAN
+def sr(img):
+   # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    device=torch.device("cuda")
+    model = RealESRGAN(device, scale=2)
+    model.load_weights('weights/RealESRGAN_x2.pth', download=True)
+    img=Image.fromarray(img)
+    sr_image = model.predict(img)
+    sr_image=np.array(sr_image)
+    return sr_image

unet/__pycache__/predict.cpython-310.pyc ADDED Viewed

Binary file (1.89 kB). View file

unet/dino/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .model import Dinov2

unet/dino/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (202 Bytes). View file

unet/dino/__pycache__/model.cpython-310.pyc ADDED Viewed

Binary file (1.63 kB). View file

unet/dino/__pycache__/modules.cpython-310.pyc ADDED Viewed

Binary file (2.58 kB). View file

unet/dino/__pycache__/parts.cpython-310.pyc ADDED Viewed

Binary file (2.58 kB). View file

unet/dino/model.py ADDED Viewed

	@@ -0,0 +1,47 @@

+from .parts import *
+class Dinov2(nn.Module):
+    def __init__(self, n_channels, n_classes, bilinear=False):
+        super(Dinov2, self).__init__()
+        self.n_channels = n_channels
+        self.n_classes = n_classes
+        self.bilinear = bilinear
+        self.inc = (DoubleConv(n_channels, 64))
+        self.down1 = (Down(64, 128))
+        self.down2 = (Down(128, 256))
+        self.down3 = (Down(256, 512))
+        factor = 2 if bilinear else 1
+        self.down4 = (Down(512, 1024 // factor))
+        self.up1 = (Up(1024, 512 // factor, bilinear))
+        self.up2 = (Up(512, 256 // factor, bilinear))
+        self.up3 = (Up(256, 128 // factor, bilinear))
+        self.up4 = (Up(128, 64, bilinear))
+        self.outc = (OutConv(64, n_classes))
+    def forward(self, x):
+        x1 = self.inc(x)
+        x2 = self.down1(x1)
+        x3 = self.down2(x2)
+        x4 = self.down3(x3)
+        x5 = self.down4(x4)
+        x = self.up1(x5, x4)
+        x = self.up2(x, x3)
+        x = self.up3(x, x2)
+        x = self.up4(x, x1)
+        logits = self.outc(x)
+        return logits
+    def use_checkpointing(self):
+        self.inc = torch.utils.checkpoint(self.inc)
+        self.down1 = torch.utils.checkpoint(self.down1)
+        self.down2 = torch.utils.checkpoint(self.down2)
+        self.down3 = torch.utils.checkpoint(self.down3)
+        self.down4 = torch.utils.checkpoint(self.down4)
+        self.up1 = torch.utils.checkpoint(self.up1)
+        self.up2 = torch.utils.checkpoint(self.up2)
+        self.up3 = torch.utils.checkpoint(self.up3)
+        self.up4 = torch.utils.checkpoint(self.up4)
+        self.outc = torch.utils.checkpoint(self.outc)

unet/dino/parts.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class DoubleConv(nn.Module):
+    def __init__(self, in_channels, out_channels, mid_channels=None):
+        super().__init__()
+        if not mid_channels:
+            mid_channels = out_channels
+        self.double_conv = nn.Sequential(
+            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
+            nn.BatchNorm2d(mid_channels),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True)
+        )
+    def forward(self, x):
+        return self.double_conv(x)
+class Down(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super().__init__()
+        self.maxpool_conv = nn.Sequential(
+            nn.MaxPool2d(2),
+            DoubleConv(in_channels, out_channels)
+        )
+    def forward(self, x):
+        return self.maxpool_conv(x)
+class Up(nn.Module):
+    def __init__(self, in_channels, out_channels, bilinear=True):
+        super().__init__()
+        if bilinear:
+            self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
+            self.conv = DoubleConv(in_channels, out_channels, in_channels // 2)
+        else:
+            self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
+            self.conv = DoubleConv(in_channels, out_channels)
+    def forward(self, x1, x2):
+        x1 = self.up(x1)
+        diffY = x2.size()[2] - x1.size()[2]
+        diffX = x2.size()[3] - x1.size()[3]
+        x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
+                        diffY // 2, diffY - diffY // 2])
+        x = torch.cat([x2, x1], dim=1)
+        return self.conv(x)
+class OutConv(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super(OutConv, self).__init__()
+        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)
+    def forward(self, x):
+        return self.conv(x)

unet/predict.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import numpy as np
+import torch
+import torch.nn.functional as F
+from PIL import Image
+def preprocess(mask_values, pil_img, scale, is_mask):
+        pil_img=Image.fromarray(pil_img)
+        w, h = pil_img.size
+        newW, newH = int(scale * w), int(scale * h)
+        pil_img = pil_img.resize((newW, newH))
+        img = np.asarray(pil_img)
+        if is_mask:
+            mask = np.zeros((newH, newW), dtype=np.int64)
+            for i, v in enumerate(mask_values):
+                if img.ndim == 2:
+                    mask[img == v] = i
+                else:
+                    mask[(img == v).all(-1)] = i
+            return mask
+        else:
+            if img.ndim == 2:
+                img = img[np.newaxis, ...]
+            else:
+                img = img.transpose((2, 0, 1))
+            if (img > 1).any():
+                img = img / 255.0
+            return img
+def predict_img(net,
+                full_img,
+                device,
+                scale_factor=1,
+                out_threshold=0.5):
+    net.eval()
+    img = torch.from_numpy(preprocess(None, full_img, scale_factor, is_mask=False))
+    img = img.unsqueeze(0)
+    img = img.to(device=device, dtype=torch.float32)
+    with torch.no_grad():
+        output = net(img).cpu()
+        if net.n_classes > 1:
+            mask = output.argmax(dim=1)
+        else:
+            mask = torch.sigmoid(output) > out_threshold
+    return mask[0].long().squeeze().numpy()
+def mask_to_image(mask: np.ndarray, mask_values):
+    if isinstance(mask_values[0], list):
+        out = np.zeros((mask.shape[-2], mask.shape[-1], len(mask_values[0])), dtype=np.uint8)
+    elif mask_values == [0, 1]:
+        out = np.zeros((mask.shape[-2], mask.shape[-1]), dtype=bool)
+    else:
+        out = np.zeros((mask.shape[-2], mask.shape[-1]), dtype=np.uint8)
+    if mask.ndim == 3:
+        mask = np.argmax(mask, axis=0)
+    for i, v in enumerate(mask_values):
+        out[mask == i] = v
+    return Image.fromarray(out)