Spaces:

rogerxavier
/

moviepy_with_manga_test

Sleeping

App Files Files Community

rogerxavier commited on Jun 2, 2024

Commit

371ebaa

verified ·

1 Parent(s): e8bc308

Update 3mergeDialogToVideo.py

Browse files

Files changed (1) hide show

3mergeDialogToVideo.py +202 -23

3mergeDialogToVideo.py CHANGED Viewed

@@ -73,7 +73,136 @@ def get_associate_dialog(image_path:"图片路径(包含后缀)",dialog_cut_path
     return associated_dialogs
-#通过对话框图片路径,获取对话框文字list
 def get_sorted_dialog_text(image_path:"包含后缀的文件路径")->"返回排序后的text list(一列或者几列话,反正是一个框的内容,几句不清楚,一个框的list当一次文案就行)  或者失败请求返回none":
     image_bytes = open(image_path, 'rb')
     headers = {
@@ -101,34 +230,83 @@ def get_sorted_dialog_text(image_path:"包含后缀的文件路径")->"返回排
         #先json转换，0为坐标list合集,1为 boxid和text合集
         boxCoordinates , boxInfo = resp.json()[0],resp.json()[1] #分别是list和dict类型
         print("ofa ocr识别漫画块成功返回")
         # 计算文本框的中心点，以便按照从右往左，从上往下的顺序进行排序
         centers = [((box[0][0] + box[2][0]) / 2, (box[0][1] + box[2][1]) / 2) for box in boxCoordinates]
-        # 按照中心点的坐标从右往左，从上往下的顺序对文本框坐标进行排序
-        sorted_indices = sorted(range(len(centers)), key=lambda i: (-centers[i][0], centers[i][1]))
-       # 获取排序后的文本框坐标和对应的文字
         sorted_coordinates = [boxCoordinates[i] for i in sorted_indices]
-        sorted_text = [boxInfo['Text'][str(i)] for i in sorted_indices]
-        # 根据x方向偏差对小于1/3宽度之间的不同文本框进行重排
-        for i in range(len(sorted_indices) - 1):
-            if centers[sorted_indices[i]][0] - centers[sorted_indices[i+1]][0] < (sorted_coordinates[i][2][0] - sorted_coordinates[i][0][0]) / 3:
-                if sorted_coordinates[i][0][1] > sorted_coordinates[i+1][2][1]:
-                    #if这里看y轴了
-                    sorted_indices[i], sorted_indices[i+1] = sorted_indices[i+1], sorted_indices[i]
-        # 根据x方向和文本框宽度对大于一个标准宽度之间的不同文本框断句
-        for i in range(len(sorted_indices) - 1):
-            if centers[sorted_indices[i]][0] - centers[sorted_indices[i + 1]][0] > (
-                    sorted_coordinates[i][2][0] - sorted_coordinates[i][0][0]) * 1.5:
-                # 如果相邻文本框的横坐标距离大于一个标准宽度的2/3，进行断句
-                sorted_text[i] += '\n'
-        # 在最后一个项末尾添加"\n",从而隔开其他的漫画块对话
-        sorted_text[-1] += '\n'
-        # 在最后一个项末尾添加"\n",从而隔开其他的漫画块对话
         sorted_coordinates = [boxCoordinates[i] for i in sorted_indices]
         print(sorted_coordinates)
         print(sorted_text)
@@ -139,6 +317,7 @@ def get_sorted_dialog_text(image_path:"包含后缀的文件路径")->"返回排
         return None
 #通过文字获取音频
 def get_audio_data(text:str)-> "返回audio data io句柄, duration(也有可能包含无效字符导致生成音频400错误)":
     # Creates an instance of a speech config with specified subscription key and service region.

     return associated_dialogs
+def merge_sublists(lists):
+    merged = []
+    for sublist in lists:
+        found = False
+        for m in merged:
+            if any(elem in sublist for elem in m):
+                m.extend(elem for elem in sublist if elem not in m)
+                found = True
+                break
+        if not found:
+            merged.append(sublist)
+    return merged
+# 任意两框进行中心高度差和中心宽度差比较,如果xy都相近,那么认为是同一个框的对话,加入一个对话数组里面,
+# 最终将漫画块分成几个对话框数组,然后再对数组间进行从上到下,从右到左排序
+# 定义一个函数来寻找相关的点并加入新的list
+def find_associate_text(sorted_indices,centers,sorted_coordinates,boxInfo):
+    associate_text_list = []
+    related_groups = []
+    for i in range(len(sorted_indices) - 1):
+        for j in range(i+1 , len(sorted_indices)):
+            if (abs(centers[sorted_indices[i]][1] - centers[sorted_indices[j]][1]) < abs(
+                    (sorted_coordinates[i][2][1] - sorted_coordinates[i][0][1])) / 3) \
+                    and (abs(centers[sorted_indices[i]][0] - centers[sorted_indices[j]][0]) < abs(
+                (sorted_coordinates[i][2][0] - sorted_coordinates[i][0][0])) * 1.5):
+                # Check if the points i and j are already in the same related group
+                found = False
+                for group in related_groups:
+                    if i in group or j in group:
+                        group.add(i)
+                        group.add(j)
+                        found = True
+                        break
+                if not found:
+                    related_groups.append({i, j})
+    for group in related_groups:
+        text_group = []
+        for idx in group:
+            text_group.append(boxInfo['Text'][str(sorted_indices[idx])])#这里加入的是排序后的索引
+        associate_text_list.append(text_group)
+    return merge_sublists(associate_text_list),related_groups
+ #先对组内对话从右到左排序,处理反馈到related_groups  (因为sorted_indices本身就是从右到左,从上到下排序后的)
+# 这个记录的顺序改变,最后sorted_text = [boxInfo['Text'][str(i)] for i in sorted_indices]就可以得到正确的顺序
+#要保证一个List中的组内有序和组间有序，通常应该先排序组内，然后再保持组间有序
+def sort_associate_text_list(sorted_indices:list,related_groups:list,boxCoordinates,centers)->list:
+    sorted_groups = []
+    # 返回组内排序后的 sorted_groups
+    for group in related_groups:
+        group = list(group)  # 将集合转换为列表
+        isVertical = False
+        isCross = False
+        # 前提是竖框->使用 lambda 函数按照中心点坐标的 x 值对 group 中的元素进行排序，使得x大的(靠右的)在前面
+        for idx in group:
+            if (boxCoordinates[sorted_indices[idx]][2][0] - boxCoordinates[sorted_indices[idx]][0][0]) > (
+                    boxCoordinates[sorted_indices[idx]][2][1] - boxCoordinates[sorted_indices[idx]][0][1]):
+                # 这里是宽>高,说明是横框
+                isCross =True
+                pass  # 你可以在这里添加你想要执行的代码
+            else:
+                # 这里宽<高,说明是竖框
+                isVertical = True
+                pass  # 你可以在这里添加你想要执行的代码
+        if isVertical:
+            group.sort(key=lambda idx:  centers[sorted_indices[idx]][0], reverse=True)
+        if isCross:
+            group.sort(key=lambda idx:  centers[sorted_indices[idx]][1], reverse=False)
+        sorted_groups.append(group)
+    return sorted_groups
+#再对组间对话先上后下,从右到左排序,同时将单独对话加入合适位置.返回排序后的related_groups
+def sort_dialog_list(sorted_indices:list,related_groups:list,sorted_coordinates)->list:
+    sorted_groups = []
+    related_groups_copy = related_groups.copy()
+    sorted_indices_copy = sorted_indices.copy()
+    added = {}
+    # 返回组内排序后的 sorted_groups
+    # 任意两框进行加权高度差值比较,然后交换顺序,而不是只遍历一遍交换,如果y中心点差在1/3 文本框长度下认为相同,这时按照x从右往左顺序看
+    for i in range(len(sorted_indices) - 1):
+        if ((
+                             sorted_coordinates[i][2][0] - sorted_coordinates[i][0][0]) < (
+                             sorted_coordinates[i][2][1] - sorted_coordinates[i][0][1])):
+            # 竖框情况下(宽小于高),依次加入元素,加到在组中的那么后序按组顺序加,然后继续（再碰到不加）
+            pass #竖框不动,横框剔除,后序不在次循环中储粮
+        else:
+            sorted_indices_copy.remove(i)
+            # 横框情况下(宽大于高)#横框干脆不读了(从sorted_indices_copy中剔除),太影响了
+    for idx in sorted_indices_copy:
+        added[idx] = False
+        for group in related_groups_copy:
+            if idx in group:
+                sorted_groups.append(group)
+                related_groups_copy.remove(group)
+                added[idx] = True
+                break
+        if not added[idx]:
+            sorted_groups.append(idx)
+    # 创建一个新列表来存储不应该单独存在的元素,并且游离的元素也变[]包裹
+    filtered_data = []
+    data = sorted_groups
+    for item in data:
+        if isinstance(item, list):
+            # 如果元素是列表，则将其添加到新列表中
+            filtered_data.append(item)
+        else:
+            # 如果元素不是列表，则检查是否存在于其他子项数组中，如果不存在则添加到新列表中
+            is_in_sublist = False
+            for sublist in data:
+                if isinstance(sublist, list) and item in sublist:
+                    is_in_sublist = True
+                    break
+            if not is_in_sublist:
+                filtered_data.append([item])
+    return filtered_data
 def get_sorted_dialog_text(image_path:"包含后缀的文件路径")->"返回排序后的text list(一列或者几列话,反正是一个框的内容,几句不清楚,一个框的list当一次文案就行)  或者失败请求返回none":
     image_bytes = open(image_path, 'rb')
     headers = {
         #先json转换，0为坐标list合集,1为 boxid和text合集
         boxCoordinates , boxInfo = resp.json()[0],resp.json()[1] #分别是list和dict类型
         print("ofa ocr识别漫画块成功返回")
+        print("boxCoordinates是",boxCoordinates)
+        print("boxInfo是",boxInfo)
+        boxCoordinates = [[[311, 44], [357, 44], [357, 466], [311, 466]], [[165, 94], [211, 94], [211, 262], [165, 262]],
+                          [[1144, 145], [1188, 146], [1188, 303], [1144, 303]],
+                          [[1097, 150], [1141, 150], [1141, 380], [1097, 380]],
+                          [[135, 320], [177, 320], [177, 558], [135, 558]], [[87, 322], [131, 322], [131, 670], [87, 670]],
+                          [[1023, 438], [1073, 438], [1073, 714], [1023, 714]],
+                          [[981, 440], [1025, 440], [1025, 678], [981, 678]], [[5, 721], [87, 721], [87, 749], [5, 749]],
+                          [[125, 723], [173, 723], [173, 749], [125, 749]],
+                          [[175, 723], [201, 723], [201, 749], [175, 749]]]
+        boxInfo = {
+            'Box ID': {'0': '1', '1': '2', '2': '3', '3': '4', '4': '5', '5': '6', '6': '7', '7': '8', '8': '9', '9': '10',
+                       '10': '11'},
+            'Text': {'0': '或者是能够实现一切愿望', '1': '再或者:', '2': '只要能够', '3': '解放这个技能', '4': '能够搞明白我', '5': '转生异世界的理由:',
+                     '6': '是否能获得如同', '7': '神明的力量|u', '8': 'jmc', '9': 'mm', '10': 'c'}}
         # 计算文本框的中心点，以便按照从右往左，从上往下的顺序进行排序
         centers = [((box[0][0] + box[2][0]) / 2, (box[0][1] + box[2][1]) / 2) for box in boxCoordinates]
+        # 根据每个元组的第一个元素进行降序排序，如果第一个元素相同时，则根据第二个元素进行升序排序。即先关注y后关注x(更重上下)
+        sorted_indices = sorted(range(len(centers)), key=lambda i: ( centers[i][1],-centers[i][0]))
+        # # 即先关注x后关注y(更注重从右到左)
+        # sorted_indices = sorted(range(len(centers)), key=lambda i: ( -centers[i][0],centers[i][1]))
+        # 获取排序后的文本框坐标和对应的文字
         sorted_coordinates = [boxCoordinates[i] for i in sorted_indices]
+        # 调用函数并打印结果
+        associate_text_list,related_groups = find_associate_text(sorted_indices,centers,sorted_coordinates,boxInfo)
+        print("相关list是",associate_text_list)
+        print("related_groups是",related_groups)
+        print("sorted_indices是",sorted_indices)
+        related_groups = sort_associate_text_list(sorted_indices,related_groups,boxCoordinates,centers)
+        print("组内排序后的related_groups是",related_groups)
+        #[[3, 4], [7, 5, 6], [10, 9], [11, 12, 13], [15, 16, 14]]
+        related_groups_in_sorted_indices = []
+        for group in related_groups:
+            related_groups_in_sorted_indices_item = []
+            for idx in group:
+                related_groups_in_sorted_indices_item.append(sorted_indices[idx])# 这里加入的是排序后的索引
+            related_groups_in_sorted_indices.append(related_groups_in_sorted_indices_item)
+        print("related_groups_in_sorted_indices是",related_groups_in_sorted_indices)
+        #related_groups_in_sorted_indices->[[7, 6], [3, 2, 4], [9, 10], [11, 13, 12], [15, 16, 14]]->
+        #期望结果[[0],[3, 2, 4],[1],[5],[7, 6],[8], [9, 10],[11, 13, 12], [15, 16, 14]]
+        related_groups = sort_dialog_list(sorted_indices,related_groups,sorted_coordinates)
+        print("related_groups组件排序后是:",related_groups)
+        # 将子列表中的数字提取出来组成一个新的列表(纯数字),去除子项间的[]，
+        # 如[[3, 4], [7, 5, 6], [10, 9], [11, 12, 13], [15, 16, 14]] ->[3, 4, 7, 5, 6, 10, 9, 11, 12, 13, 15, 16, 14]
+        flattened_list = [num for sublist in related_groups for num in sublist]
+        added_indices = set()
+        sorted_text = []
+        for i in flattened_list:
+            for sublist in related_groups:
+                if i in sublist:
+                    if i == sublist[-1] and i not in added_indices:
+                        sorted_text.append(boxInfo['Text'][str(sorted_indices[i])] + '\n')
+                        added_indices.add(i)
+                    elif i not in added_indices:
+                        sorted_text.append(boxInfo['Text'][str(sorted_indices[i])])
+                        added_indices.add(i)
+        print("不完整的sorted_text是",sorted_text)
+        # 不用在最后一个项末尾添加"\n",从而隔开其他的漫画块对话(因为总会有最后一个子块,因而上述方式就可以加上了)
         sorted_coordinates = [boxCoordinates[i] for i in sorted_indices]
         print(sorted_coordinates)
         print(sorted_text)
         return None
 #通过文字获取音频
 def get_audio_data(text:str)-> "返回audio data io句柄, duration(也有可能包含无效字符导致生成音频400错误)":
     # Creates an instance of a speech config with specified subscription key and service region.