Spaces:

rogerxavier
/

moviepy_with_manga_test

Running

App Files Files Community

rogerxavier commited on May 31

Commit

3b99f69

•

1 Parent(s): a046690

Update 3mergeDialogToVideo.py

Browse files

Files changed (1) hide show

3mergeDialogToVideo.py +12 -2

3mergeDialogToVideo.py CHANGED Viewed

@@ -9,7 +9,7 @@ import requests
 import tempfile
 import time
 from moviepy.audio.AudioClip import AudioArrayClip
 from moviepy.editor import *
 import cv2
@@ -34,6 +34,15 @@ print("my_openai_key",my_openai_key)
 #通过去水印完整漫画图片->获取相应的对话框图片->获取对话框文字->返回对话框文字
 def get_image_copywrite(image_path:"图片路径(包含后缀)",dialog_cut_path:"对话框切割路径")->"返回漫画关联对话框识别后得到的文案str(原文即可),也可能是none":
     dialog_texts = ''
     associate_dialog_img = get_associate_dialog(image_path=image_path,dialog_cut_path=dialog_cut_path)
     if len(associate_dialog_img)!=0:
@@ -42,7 +51,8 @@ def get_image_copywrite(image_path:"图片路径(包含后缀)",dialog_cut_path:
             cur_dialog_texts = get_sorted_dialog_text(dialog_img_path)#一个对话框的文字list
             if cur_dialog_texts is not None:
                 for dialog_text in cur_dialog_texts:
-                    dialog_texts += dialog_text
                     #因为已经在数组中加入了\n 换行,这里就不用加了
             else:
                 print(dialog_img_path+"识别是空-可能是有问题")

 import tempfile
 import time
+import re #正则对话剔除非中文,保留'\n'
 from moviepy.audio.AudioClip import AudioArrayClip
 from moviepy.editor import *
 import cv2
 #通过去水印完整漫画图片->获取相应的对话框图片->获取对话框文字->返回对话框文字
 def get_image_copywrite(image_path:"图片路径(包含后缀)",dialog_cut_path:"对话框切割路径")->"返回漫画关联对话框识别后得到的文案str(原文即可),也可能是none":
+    def extract_chinese(text:str)->str:
+        #剔除除了 '\n'外的非中文字符
+        chinese_pattern = re.compile("[\u4e00-\u9fa5]+")  # 匹配中文字符的正则表达式
+        chinese_text = ""
+        for char in text:
+            if char == '\n' or re.match(chinese_pattern, char):
+                chinese_text += char
+        return chinese_text
     dialog_texts = ''
     associate_dialog_img = get_associate_dialog(image_path=image_path,dialog_cut_path=dialog_cut_path)
     if len(associate_dialog_img)!=0:
             cur_dialog_texts = get_sorted_dialog_text(dialog_img_path)#一个对话框的文字list
             if cur_dialog_texts is not None:
                 for dialog_text in cur_dialog_texts:
+                    # dialog_texts += dialog_text
+                    dialog_texts += extract_chinese(dialog_text)
                     #因为已经在数组中加入了\n 换行,这里就不用加了
             else:
                 print(dialog_img_path+"识别是空-可能是有问题")