rogerxavier
commited on
Commit
•
371ebaa
1
Parent(s):
e8bc308
Update 3mergeDialogToVideo.py
Browse files- 3mergeDialogToVideo.py +202 -23
3mergeDialogToVideo.py
CHANGED
@@ -73,7 +73,136 @@ def get_associate_dialog(image_path:"图片路径(包含后缀)",dialog_cut_path
|
|
73 |
return associated_dialogs
|
74 |
|
75 |
|
76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
def get_sorted_dialog_text(image_path:"包含后缀的文件路径")->"返回排序后的text list(一列或者几列话,反正是一个框的内容,几句不清楚,一个框的list当一次文案就行) 或者失败请求返回none":
|
78 |
image_bytes = open(image_path, 'rb')
|
79 |
headers = {
|
@@ -101,34 +230,83 @@ def get_sorted_dialog_text(image_path:"包含后缀的文件路径")->"返回排
|
|
101 |
#先json转换,0为坐标list合集,1为 boxid和text合集
|
102 |
boxCoordinates , boxInfo = resp.json()[0],resp.json()[1] #分别是list和dict类型
|
103 |
print("ofa ocr识别漫画块成功返回")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
# 计算文本框的中心点,以便按照从右往左,从上往下的顺序进行排序
|
105 |
centers = [((box[0][0] + box[2][0]) / 2, (box[0][1] + box[2][1]) / 2) for box in boxCoordinates]
|
|
|
|
|
106 |
|
107 |
-
#
|
108 |
-
sorted_indices = sorted(range(len(centers)), key=lambda i: (-centers[i][0],
|
109 |
|
110 |
-
|
111 |
sorted_coordinates = [boxCoordinates[i] for i in sorted_indices]
|
112 |
-
sorted_text = [boxInfo['Text'][str(i)] for i in sorted_indices]
|
113 |
-
|
114 |
-
# 根据x方向偏差对小于1/3宽度之间的不同文本框进行重排
|
115 |
-
for i in range(len(sorted_indices) - 1):
|
116 |
-
if centers[sorted_indices[i]][0] - centers[sorted_indices[i+1]][0] < (sorted_coordinates[i][2][0] - sorted_coordinates[i][0][0]) / 3:
|
117 |
-
if sorted_coordinates[i][0][1] > sorted_coordinates[i+1][2][1]:
|
118 |
-
#if这里看y轴了
|
119 |
-
sorted_indices[i], sorted_indices[i+1] = sorted_indices[i+1], sorted_indices[i]
|
120 |
-
|
121 |
-
# 根据x方向和文本框宽度对大于一个标准宽度之间的不同文本框断句
|
122 |
-
for i in range(len(sorted_indices) - 1):
|
123 |
-
if centers[sorted_indices[i]][0] - centers[sorted_indices[i + 1]][0] > (
|
124 |
-
sorted_coordinates[i][2][0] - sorted_coordinates[i][0][0]) * 1.5:
|
125 |
-
# 如果相邻文本框的横坐标距离大于一个标准宽度的2/3,进行断句
|
126 |
-
sorted_text[i] += '\n'
|
127 |
-
|
128 |
-
# 在最后一个项末尾添加"\n",从而隔开其他的漫画块对话
|
129 |
-
sorted_text[-1] += '\n'
|
130 |
-
# 在最后一个项末尾添加"\n",从而隔开其他的漫画块对话
|
131 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
sorted_coordinates = [boxCoordinates[i] for i in sorted_indices]
|
133 |
print(sorted_coordinates)
|
134 |
print(sorted_text)
|
@@ -139,6 +317,7 @@ def get_sorted_dialog_text(image_path:"包含后缀的文件路径")->"返回排
|
|
139 |
return None
|
140 |
|
141 |
|
|
|
142 |
#通过文字获取音频
|
143 |
def get_audio_data(text:str)-> "返回audio data io句柄, duration(也有可能包含无效字符导致生成音频400错误)":
|
144 |
# Creates an instance of a speech config with specified subscription key and service region.
|
|
|
73 |
return associated_dialogs
|
74 |
|
75 |
|
76 |
+
def merge_sublists(lists):
|
77 |
+
merged = []
|
78 |
+
for sublist in lists:
|
79 |
+
found = False
|
80 |
+
for m in merged:
|
81 |
+
if any(elem in sublist for elem in m):
|
82 |
+
m.extend(elem for elem in sublist if elem not in m)
|
83 |
+
found = True
|
84 |
+
break
|
85 |
+
if not found:
|
86 |
+
merged.append(sublist)
|
87 |
+
return merged
|
88 |
+
|
89 |
+
|
90 |
+
# 任意两框进行中心高度差和中心宽度差比较,如果xy都相近,那么认为是同一个框的对话,加入一个对话数组里面,
|
91 |
+
# 最终将漫画块分成几个对话框数组,然后再对数组间进行从上到下,从右到左排序
|
92 |
+
# 定义一个函数来寻找相关的点并加入新的list
|
93 |
+
def find_associate_text(sorted_indices,centers,sorted_coordinates,boxInfo):
|
94 |
+
associate_text_list = []
|
95 |
+
related_groups = []
|
96 |
+
for i in range(len(sorted_indices) - 1):
|
97 |
+
for j in range(i+1 , len(sorted_indices)):
|
98 |
+
if (abs(centers[sorted_indices[i]][1] - centers[sorted_indices[j]][1]) < abs(
|
99 |
+
(sorted_coordinates[i][2][1] - sorted_coordinates[i][0][1])) / 3) \
|
100 |
+
and (abs(centers[sorted_indices[i]][0] - centers[sorted_indices[j]][0]) < abs(
|
101 |
+
(sorted_coordinates[i][2][0] - sorted_coordinates[i][0][0])) * 1.5):
|
102 |
+
|
103 |
+
# Check if the points i and j are already in the same related group
|
104 |
+
found = False
|
105 |
+
for group in related_groups:
|
106 |
+
if i in group or j in group:
|
107 |
+
group.add(i)
|
108 |
+
group.add(j)
|
109 |
+
found = True
|
110 |
+
break
|
111 |
+
if not found:
|
112 |
+
related_groups.append({i, j})
|
113 |
+
|
114 |
+
|
115 |
+
for group in related_groups:
|
116 |
+
text_group = []
|
117 |
+
for idx in group:
|
118 |
+
text_group.append(boxInfo['Text'][str(sorted_indices[idx])])#这里加入的是排序后的索引
|
119 |
+
associate_text_list.append(text_group)
|
120 |
+
|
121 |
+
return merge_sublists(associate_text_list),related_groups
|
122 |
+
|
123 |
+
|
124 |
+
|
125 |
+
#先对组内对话从右到左排序,处理反馈到related_groups (因为sorted_indices本身就是从右到左,从上到下排序后的)
|
126 |
+
# 这个记录的顺序改变,最后sorted_text = [boxInfo['Text'][str(i)] for i in sorted_indices]就可以得到正确的顺序
|
127 |
+
#要保证一个List中的组内有序和组间有序,通常应该先排序组内,然后再保持组间有序
|
128 |
+
def sort_associate_text_list(sorted_indices:list,related_groups:list,boxCoordinates,centers)->list:
|
129 |
+
sorted_groups = []
|
130 |
+
# 返回组内排序后的 sorted_groups
|
131 |
+
for group in related_groups:
|
132 |
+
group = list(group) # 将集合转换为列表
|
133 |
+
isVertical = False
|
134 |
+
isCross = False
|
135 |
+
# 前提是竖框->使用 lambda 函数按照中心点坐标的 x 值对 group 中的元素进行排序,使得x大的(靠右的)在前面
|
136 |
+
for idx in group:
|
137 |
+
if (boxCoordinates[sorted_indices[idx]][2][0] - boxCoordinates[sorted_indices[idx]][0][0]) > (
|
138 |
+
boxCoordinates[sorted_indices[idx]][2][1] - boxCoordinates[sorted_indices[idx]][0][1]):
|
139 |
+
# 这里是宽>高,说明是横框
|
140 |
+
isCross =True
|
141 |
+
pass # 你可以在这里添加你想要执行的代码
|
142 |
+
else:
|
143 |
+
# 这里宽<高,说明是竖框
|
144 |
+
isVertical = True
|
145 |
+
pass # 你可以在这里添加你想要执行的代码
|
146 |
+
if isVertical:
|
147 |
+
group.sort(key=lambda idx: centers[sorted_indices[idx]][0], reverse=True)
|
148 |
+
if isCross:
|
149 |
+
group.sort(key=lambda idx: centers[sorted_indices[idx]][1], reverse=False)
|
150 |
+
sorted_groups.append(group)
|
151 |
+
|
152 |
+
return sorted_groups
|
153 |
+
|
154 |
+
|
155 |
+
|
156 |
+
#再对组间对话先上后下,从右到左排序,同时将单独对话加入合适位置.返回排序后的related_groups
|
157 |
+
def sort_dialog_list(sorted_indices:list,related_groups:list,sorted_coordinates)->list:
|
158 |
+
sorted_groups = []
|
159 |
+
related_groups_copy = related_groups.copy()
|
160 |
+
sorted_indices_copy = sorted_indices.copy()
|
161 |
+
added = {}
|
162 |
+
# 返回组内排序后的 sorted_groups
|
163 |
+
# 任意两框进行加权高度差值比较,然后交换顺序,而不是只遍历一遍交换,如果y中心点差在1/3 文本框长度下认为相同,这时按照x从右往左顺序看
|
164 |
+
for i in range(len(sorted_indices) - 1):
|
165 |
+
if ((
|
166 |
+
sorted_coordinates[i][2][0] - sorted_coordinates[i][0][0]) < (
|
167 |
+
sorted_coordinates[i][2][1] - sorted_coordinates[i][0][1])):
|
168 |
+
|
169 |
+
# 竖框情况下(宽小于高),依次加入元素,加到在组中的那么后序按组顺序加,然后继续(再碰到不加)
|
170 |
+
pass #竖框不动,横框剔除,后序不在次循环中储粮
|
171 |
+
else:
|
172 |
+
sorted_indices_copy.remove(i)
|
173 |
+
# 横框情况下(宽大于高)#横框干脆不读了(从sorted_indices_copy中剔除),太影响了
|
174 |
+
|
175 |
+
for idx in sorted_indices_copy:
|
176 |
+
added[idx] = False
|
177 |
+
for group in related_groups_copy:
|
178 |
+
if idx in group:
|
179 |
+
sorted_groups.append(group)
|
180 |
+
related_groups_copy.remove(group)
|
181 |
+
added[idx] = True
|
182 |
+
break
|
183 |
+
if not added[idx]:
|
184 |
+
sorted_groups.append(idx)
|
185 |
+
# 创建一个新列表来存储不应该单独存在的元素,并且游离的元素也变[]包裹
|
186 |
+
filtered_data = []
|
187 |
+
data = sorted_groups
|
188 |
+
for item in data:
|
189 |
+
if isinstance(item, list):
|
190 |
+
# 如果元素是列表,则将其添加到新列表中
|
191 |
+
filtered_data.append(item)
|
192 |
+
else:
|
193 |
+
# 如果元素不是列表,则检查是否存在于其他子项数组中,如果不存在则添加到新列表中
|
194 |
+
is_in_sublist = False
|
195 |
+
for sublist in data:
|
196 |
+
if isinstance(sublist, list) and item in sublist:
|
197 |
+
is_in_sublist = True
|
198 |
+
break
|
199 |
+
if not is_in_sublist:
|
200 |
+
filtered_data.append([item])
|
201 |
+
|
202 |
+
|
203 |
+
return filtered_data
|
204 |
+
|
205 |
+
|
206 |
def get_sorted_dialog_text(image_path:"包含后缀的文件路径")->"返回排序后的text list(一列或者几列话,反正是一个框的内容,几句不清楚,一个框的list当一次文案就行) 或者失败请求返回none":
|
207 |
image_bytes = open(image_path, 'rb')
|
208 |
headers = {
|
|
|
230 |
#先json转换,0为坐标list合集,1为 boxid和text合集
|
231 |
boxCoordinates , boxInfo = resp.json()[0],resp.json()[1] #分别是list和dict类型
|
232 |
print("ofa ocr识别漫画块成功返回")
|
233 |
+
print("boxCoordinates是",boxCoordinates)
|
234 |
+
print("boxInfo是",boxInfo)
|
235 |
+
|
236 |
+
boxCoordinates = [[[311, 44], [357, 44], [357, 466], [311, 466]], [[165, 94], [211, 94], [211, 262], [165, 262]],
|
237 |
+
[[1144, 145], [1188, 146], [1188, 303], [1144, 303]],
|
238 |
+
[[1097, 150], [1141, 150], [1141, 380], [1097, 380]],
|
239 |
+
[[135, 320], [177, 320], [177, 558], [135, 558]], [[87, 322], [131, 322], [131, 670], [87, 670]],
|
240 |
+
[[1023, 438], [1073, 438], [1073, 714], [1023, 714]],
|
241 |
+
[[981, 440], [1025, 440], [1025, 678], [981, 678]], [[5, 721], [87, 721], [87, 749], [5, 749]],
|
242 |
+
[[125, 723], [173, 723], [173, 749], [125, 749]],
|
243 |
+
[[175, 723], [201, 723], [201, 749], [175, 749]]]
|
244 |
+
boxInfo = {
|
245 |
+
'Box ID': {'0': '1', '1': '2', '2': '3', '3': '4', '4': '5', '5': '6', '6': '7', '7': '8', '8': '9', '9': '10',
|
246 |
+
'10': '11'},
|
247 |
+
'Text': {'0': '或者是能够实现一切愿望', '1': '再或者:', '2': '只要能够', '3': '解放这个技能', '4': '能够搞明白我', '5': '转生异世界的理由:',
|
248 |
+
'6': '是否能获得如同', '7': '神明的力量|u', '8': 'jmc', '9': 'mm', '10': 'c'}}
|
249 |
+
|
250 |
# 计算文本框的中心点,以便按照从右往左,从上往下的顺序进行排序
|
251 |
centers = [((box[0][0] + box[2][0]) / 2, (box[0][1] + box[2][1]) / 2) for box in boxCoordinates]
|
252 |
+
# 根据每个元组的第一个元素进行降序排序,如果第一个元素相同时,则根据第二个元素进行升序排序。即先关注y后关注x(更重上下)
|
253 |
+
sorted_indices = sorted(range(len(centers)), key=lambda i: ( centers[i][1],-centers[i][0]))
|
254 |
|
255 |
+
# # 即先关注x后关注y(更注重从右到左)
|
256 |
+
# sorted_indices = sorted(range(len(centers)), key=lambda i: ( -centers[i][0],centers[i][1]))
|
257 |
|
258 |
+
# 获取排序后的文本框坐标和对应的文字
|
259 |
sorted_coordinates = [boxCoordinates[i] for i in sorted_indices]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
260 |
|
261 |
+
|
262 |
+
# 调用函数并打印结果
|
263 |
+
associate_text_list,related_groups = find_associate_text(sorted_indices,centers,sorted_coordinates,boxInfo)
|
264 |
+
print("相关list是",associate_text_list)
|
265 |
+
print("related_groups是",related_groups)
|
266 |
+
print("sorted_indices是",sorted_indices)
|
267 |
+
|
268 |
+
related_groups = sort_associate_text_list(sorted_indices,related_groups,boxCoordinates,centers)
|
269 |
+
|
270 |
+
print("组内排序后的related_groups是",related_groups)
|
271 |
+
#[[3, 4], [7, 5, 6], [10, 9], [11, 12, 13], [15, 16, 14]]
|
272 |
+
|
273 |
+
related_groups_in_sorted_indices = []
|
274 |
+
for group in related_groups:
|
275 |
+
related_groups_in_sorted_indices_item = []
|
276 |
+
for idx in group:
|
277 |
+
related_groups_in_sorted_indices_item.append(sorted_indices[idx])# 这里加入的是排序后的索引
|
278 |
+
related_groups_in_sorted_indices.append(related_groups_in_sorted_indices_item)
|
279 |
+
print("related_groups_in_sorted_indices是",related_groups_in_sorted_indices)
|
280 |
+
#related_groups_in_sorted_indices->[[7, 6], [3, 2, 4], [9, 10], [11, 13, 12], [15, 16, 14]]->
|
281 |
+
#期望结果[[0],[3, 2, 4],[1],[5],[7, 6],[8], [9, 10],[11, 13, 12], [15, 16, 14]]
|
282 |
+
|
283 |
+
|
284 |
+
|
285 |
+
|
286 |
+
related_groups = sort_dialog_list(sorted_indices,related_groups,sorted_coordinates)
|
287 |
+
print("related_groups组件排序后是:",related_groups)
|
288 |
+
|
289 |
+
|
290 |
+
|
291 |
+
# 将子列表中的数字提取出来组成一个新的列表(纯数字),去除子项间的[],
|
292 |
+
# 如[[3, 4], [7, 5, 6], [10, 9], [11, 12, 13], [15, 16, 14]] ->[3, 4, 7, 5, 6, 10, 9, 11, 12, 13, 15, 16, 14]
|
293 |
+
flattened_list = [num for sublist in related_groups for num in sublist]
|
294 |
+
added_indices = set()
|
295 |
+
sorted_text = []
|
296 |
+
for i in flattened_list:
|
297 |
+
for sublist in related_groups:
|
298 |
+
if i in sublist:
|
299 |
+
if i == sublist[-1] and i not in added_indices:
|
300 |
+
sorted_text.append(boxInfo['Text'][str(sorted_indices[i])] + '\n')
|
301 |
+
added_indices.add(i)
|
302 |
+
elif i not in added_indices:
|
303 |
+
sorted_text.append(boxInfo['Text'][str(sorted_indices[i])])
|
304 |
+
added_indices.add(i)
|
305 |
+
|
306 |
+
print("不完整的sorted_text是",sorted_text)
|
307 |
+
|
308 |
+
|
309 |
+
# 不用在最后一个项末尾添加"\n",从而隔开其他的漫画块对话(因为总会有最后一个子块,因而上述方式就可以加上了)
|
310 |
sorted_coordinates = [boxCoordinates[i] for i in sorted_indices]
|
311 |
print(sorted_coordinates)
|
312 |
print(sorted_text)
|
|
|
317 |
return None
|
318 |
|
319 |
|
320 |
+
|
321 |
#通过文字获取音频
|
322 |
def get_audio_data(text:str)-> "返回audio data io句柄, duration(也有可能包含无效字符导致生成音频400错误)":
|
323 |
# Creates an instance of a speech config with specified subscription key and service region.
|