Spaces:

rogerxavier
/

moviepy_with_manga_test

Running

App Files Files Community

rogerxavier commited on May 28

Commit

b8202f6

•

1 Parent(s): 8654962

Update 3mergeDialogToVideo.py

Browse files

Files changed (1) hide show

3mergeDialogToVideo.py +37 -28

3mergeDialogToVideo.py CHANGED Viewed

@@ -13,7 +13,6 @@ import time
 from moviepy.audio.AudioClip import AudioArrayClip
 from moviepy.editor import *
 import cv2
-import azure.cognitiveservices.speech as speechsdk
 import numpy as np
 import io
 import base64
@@ -25,7 +24,7 @@ import os
 azure_speech_key = os.getenv('azure_speech_key')
 azure_service_region = os.getenv('azure_service_region')
 my_openai_key = os.getenv('my_openai_key')
 print("azure key是",azure_speech_key)
 print("azure_service_region是",azure_service_region)
 print("my_openai_key",my_openai_key)
@@ -123,32 +122,42 @@ def get_audio_data(text:str)-> "返回audio data io句柄, duration":
     speech_key = azure_speech_key
     service_region = azure_service_region
-    speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)
-    # Note: the voice setting will not overwrite the voice element in input SSML.
-    speech_config.speech_synthesis_voice_name = "zh-CN-YunxiNeural" ##云希
-    # use the default speaker as audio output.
-    speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config)
-    result = speech_synthesizer.speak_text_async(text).get()
-    # Check result
-    if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
-        print("Speech synthesized for text [{}]".format(text))
-    elif result.reason == speechsdk.ResultReason.Canceled:
-        cancellation_details = result.cancellation_details
-        print("Speech synthesis canceled: {}".format(cancellation_details.reason))
-        if cancellation_details.reason == speechsdk.CancellationReason.Error:
-            print("Error details: {}".format(cancellation_details.error_details))
-    # print("音频持续时间是",result.audio_duration)
-    # print("音频数据是",result.audio_data)
-    # 创建临时文件 -当前路径下面
-    with tempfile.NamedTemporaryFile(dir='/',delete=False) as temp_file:
-        temp_file.write(result.audio_data)
-        temp_file.close()
-        # 在这里完成您对文件的操作，比如返回文件名
-        file_name = temp_file.name
-    return file_name, str(result.audio_duration)
 # 补零函数，将数字部分补齐为指定长度

 from moviepy.audio.AudioClip import AudioArrayClip
 from moviepy.editor import *
 import cv2
 import numpy as np
 import io
 import base64
 azure_speech_key = os.getenv('azure_speech_key')
 azure_service_region = os.getenv('azure_service_region')
 my_openai_key = os.getenv('my_openai_key')
+speech_synthesis_voice_name = "zh-CN-YunhaoNeural"  ##云皓
 print("azure key是",azure_speech_key)
 print("azure_service_region是",azure_service_region)
 print("my_openai_key",my_openai_key)
     speech_key = azure_speech_key
     service_region = azure_service_region
+    voiceText = text
+    url = f"https://{service_region}.tts.speech.microsoft.com/cognitiveservices/v1"
+    headers = {
+        "Ocp-Apim-Subscription-Key": speech_key,
+        "Content-Type": "application/ssml+xml",
+        "X-Microsoft-OutputFormat": "audio-16khz-128kbitrate-mono-mp3",
+        "User-Agent": "curl"
+    }
+    ssml_text = '''
+    <speak version='1.0' xml:lang='zh-CN'>
+        <voice xml:lang='zh-CN' xml:gender='male' name='{voiceName}'>
+            {voiceText}
+        </voice>
+    </speak>
+    '''.format(voiceName=speech_synthesis_voice_name,voiceText = voiceText)
+    response = requests.post(url, headers=headers, data=ssml_text.encode('utf-8'))
+    if response.status_code == 200:
+        # print("音频持续时间是",response.audio_duration)
+        # print("音频数据是",response.content)
+        # 创建临时文件 -当前路径下面
+        with tempfile.NamedTemporaryFile(dir='/',delete=False) as temp_file:
+            temp_file.write(response.content)
+            temp_file.close()
+            # 在这里完成您对文件的操作，比如返回文件名
+            file_name = temp_file.name
+        return file_name, str(response.audio_duration)
+    else:
+        print("Error: Failed to synthesize audio. Status code:", response.status_code)
 # 补零函数，将数字部分补齐为指定长度