rogerxavier
commited on
Commit
•
20d0048
1
Parent(s):
1e2d513
Create 2magiDialogCut.py
Browse files- 2magiDialogCut.py +122 -0
2magiDialogCut.py
ADDED
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import requests
|
3 |
+
import json
|
4 |
+
from PIL import Image
|
5 |
+
import os
|
6 |
+
|
7 |
+
|
8 |
+
def getImgCoordinatesByMagi(image_path:"包含后缀的文件路径") -> "全部对话坐标list,失败返回none":
|
9 |
+
headers = {
|
10 |
+
'authority': 'rogerxavier-fastapi-t5-magi.hf.space',
|
11 |
+
'method': 'GET',
|
12 |
+
'scheme': 'https',
|
13 |
+
'Accept': '*/*',
|
14 |
+
'Accept-Encoding': 'gzip, deflate, br, zstd',
|
15 |
+
'Accept-Language': 'zh-CN,zh;q=0.9',
|
16 |
+
'Cookie': 'spaces-jwt=eyJhbGciOiJFZERTQSJ9.eyJyZWFkIjp0cnVlLCJwZXJtaXNzaW9ucyI6eyJyZXBvLmNvbnRlbnQucmVhZCI6dHJ1ZX0sIm9uQmVoYWxmT2YiOnsia2luZCI6InVzZXIiLCJfaWQiOiI2NDJhNTNiNTE2ZDRkODI5M2M5YjdiNzgiLCJ1c2VyIjoicm9nZXJ4YXZpZXIifSwiaWF0IjoxNzE2NjExMTE3LCJzdWIiOiIvc3BhY2VzL3JvZ2VyeGF2aWVyL2Zhc3RhcGlfdDVfbWFnaSIsImV4cCI6MTcxNjY5NzUxNywiaXNzIjoiaHR0cHM6Ly9odWdnaW5nZmFjZS5jbyJ9.W00jo8kiRgwCpq5aaGhaPE2RP2jLOyvfimjyIfHVhP1gs7NHkBkRzVTFqYv3TRtZoHNPTiFiI5Ehu12KP06sDQ',
|
17 |
+
'Dnt': '1',
|
18 |
+
'Priority': 'u=1, i',
|
19 |
+
'Sec-Ch-Ua': '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"',
|
20 |
+
'Sec-Ch-Ua-Mobile': '?0',
|
21 |
+
'Sec-Ch-Ua-Platform': '"Windows"',
|
22 |
+
'Sec-Fetch-Dest': 'empty',
|
23 |
+
'Sec-Fetch-Mode': 'cors',
|
24 |
+
'Sec-Fetch-Site': 'same-origin',
|
25 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
|
26 |
+
}
|
27 |
+
with open(image_path, 'rb') as file:
|
28 |
+
image_bytes = file.read()
|
29 |
+
files = {
|
30 |
+
"image": image_bytes,
|
31 |
+
}
|
32 |
+
magi_fastapi_base = 'https://rogerxavier-fastapi-t5-magi.hf.space/getCoordinates'
|
33 |
+
try:
|
34 |
+
resp = requests.post(magi_fastapi_base, files=files, headers=headers) ##是仓库public后好像就api可用了
|
35 |
+
print(resp.json())
|
36 |
+
return resp.json()["texts"]
|
37 |
+
except Exception as e:
|
38 |
+
print(e)
|
39 |
+
return None #意外情况返回
|
40 |
+
|
41 |
+
|
42 |
+
def path_to_format(old_path)->"比如/path/to/your/image/1.jpg ->/path/to/your/image/100.jpg 经过03d":
|
43 |
+
# 获取文件名和文件扩展名
|
44 |
+
file_path, file_name = os.path.split(old_path)
|
45 |
+
file_name, file_extension = os.path.splitext(file_name)
|
46 |
+
# 提取文件名中的数字部分(假设数字部分在文件名的末尾)
|
47 |
+
number_str = ''.join(filter(str.isdigit, file_name))
|
48 |
+
number = int(number_str)
|
49 |
+
# 格式化数字部分为03d格式
|
50 |
+
formatted_number = "{:03d}".format(number)
|
51 |
+
# 生成新的文件名
|
52 |
+
new_file_name = f"{file_name.replace(number_str, formatted_number)}{file_extension}"
|
53 |
+
new_absolute_path = os.path.join(file_path, new_file_name)
|
54 |
+
return new_absolute_path
|
55 |
+
|
56 |
+
|
57 |
+
def save_img(new_save_path:"新文件的保存路径(包含后缀)",old_img_path:"旧文件路径(包含后缀)")->"void生成新的文件保存 ,传入旧文件路径是为了删除有问题的旧文件":
|
58 |
+
print(new_save_path)
|
59 |
+
# 原始照片文件名
|
60 |
+
original_image = old_img_path
|
61 |
+
#打开原始照片
|
62 |
+
# img = Image.open(original_image)
|
63 |
+
# text_bboxes_for_all_images = getImgCoordinatesByMagi(original_image)
|
64 |
+
# if text_bboxes_for_all_images is not None:
|
65 |
+
# for index, box in enumerate(text_bboxes_for_all_images):
|
66 |
+
# cropped_img = img.crop(tuple(box))
|
67 |
+
# # 保存裁剪后的图片,并按照数字大小依次排序命名(包括本身的也要3d,比如 0.jpg - >000.000_cropped.jpg)
|
68 |
+
#
|
69 |
+
# cropped_img.save(f"{os.path.splitext(new_save_path)[0]}_{index:03d}_cropped.jpg")
|
70 |
+
# else:
|
71 |
+
# print("图片识别有问题,准备删除")
|
72 |
+
# os.remove(original_image)
|
73 |
+
|
74 |
+
|
75 |
+
##防止文件打开无法删除
|
76 |
+
|
77 |
+
text_bboxes_for_all_images = getImgCoordinatesByMagi(original_image)
|
78 |
+
if text_bboxes_for_all_images is not None:
|
79 |
+
with Image.open(original_image) as img:
|
80 |
+
for index, box in enumerate(text_bboxes_for_all_images):
|
81 |
+
cropped_img = img.crop(tuple(box))
|
82 |
+
# 保存裁剪后的图片,并按照数字大小依次排序命名(包括本身的也要3d,比如 0.jpg - >000.000_cropped.jpg)
|
83 |
+
|
84 |
+
cropped_img.save(f"{os.path.splitext(new_save_path)[0]}_{index:03d}_cropped.jpg")
|
85 |
+
else:
|
86 |
+
print("图片识别有问题,准备删除")
|
87 |
+
os.remove(original_image)
|
88 |
+
|
89 |
+
|
90 |
+
if __name__ == '__main__':
|
91 |
+
# 获取需要裁剪的无水印漫画位置
|
92 |
+
img_path = 'manga1'
|
93 |
+
subdir_path = os.path.join(os.getcwd(), img_path)
|
94 |
+
|
95 |
+
# 图片素材获取(包含子目录下所有图片)
|
96 |
+
image_files = []
|
97 |
+
for root, dirs, files in os.walk(subdir_path):
|
98 |
+
for file in files:
|
99 |
+
if file.endswith(".jpg") or file.endswith(".png"):
|
100 |
+
image_files.append(os.path.relpath(os.path.join(root, file)))
|
101 |
+
|
102 |
+
# 创建处理后的子目录在与image_files同级目录下
|
103 |
+
processed_subdir_path = os.path.join(os.path.dirname(subdir_path), f"{img_path}2")#加2表示经过2阶段处理
|
104 |
+
os.makedirs(processed_subdir_path, exist_ok=True)
|
105 |
+
|
106 |
+
# 对image_files进行某种处理,生成新图片,并保存在处理后的子目录中
|
107 |
+
for img_file in image_files:
|
108 |
+
# 处理图片的代码(这里仅作示例)
|
109 |
+
# 假设处理后的图片为new_img
|
110 |
+
img_dir = os.path.dirname(img_file)
|
111 |
+
new_img_dir = os.path.join(processed_subdir_path, img_dir)
|
112 |
+
os.makedirs(new_img_dir, exist_ok=True)
|
113 |
+
|
114 |
+
new_img_path = os.path.join(new_img_dir, os.path.basename(img_file))
|
115 |
+
new_img_path = path_to_format(new_img_path)
|
116 |
+
|
117 |
+
if not os.path.exists(f"{os.path.splitext(new_img_path)[0]}_{1:03d}_cropped.jpg"):
|
118 |
+
# 如果已经处理过那么跳过 ->只看第一个数字片段就行
|
119 |
+
# 处理图片并保存
|
120 |
+
save_img(new_save_path=new_img_path, old_img_path=img_file)
|
121 |
+
else:
|
122 |
+
print(f"Skipping {new_img_path} as it already exists.")
|