rogerxavier
commited on
Commit
•
b8202f6
1
Parent(s):
8654962
Update 3mergeDialogToVideo.py
Browse files- 3mergeDialogToVideo.py +37 -28
3mergeDialogToVideo.py
CHANGED
@@ -13,7 +13,6 @@ import time
|
|
13 |
from moviepy.audio.AudioClip import AudioArrayClip
|
14 |
from moviepy.editor import *
|
15 |
import cv2
|
16 |
-
import azure.cognitiveservices.speech as speechsdk
|
17 |
import numpy as np
|
18 |
import io
|
19 |
import base64
|
@@ -25,7 +24,7 @@ import os
|
|
25 |
azure_speech_key = os.getenv('azure_speech_key')
|
26 |
azure_service_region = os.getenv('azure_service_region')
|
27 |
my_openai_key = os.getenv('my_openai_key')
|
28 |
-
|
29 |
print("azure key是",azure_speech_key)
|
30 |
print("azure_service_region是",azure_service_region)
|
31 |
print("my_openai_key",my_openai_key)
|
@@ -123,32 +122,42 @@ def get_audio_data(text:str)-> "返回audio data io句柄, duration":
|
|
123 |
speech_key = azure_speech_key
|
124 |
service_region = azure_service_region
|
125 |
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
#
|
150 |
-
|
151 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
152 |
|
153 |
|
154 |
# 补零函数,将数字部分补齐为指定长度
|
|
|
13 |
from moviepy.audio.AudioClip import AudioArrayClip
|
14 |
from moviepy.editor import *
|
15 |
import cv2
|
|
|
16 |
import numpy as np
|
17 |
import io
|
18 |
import base64
|
|
|
24 |
azure_speech_key = os.getenv('azure_speech_key')
|
25 |
azure_service_region = os.getenv('azure_service_region')
|
26 |
my_openai_key = os.getenv('my_openai_key')
|
27 |
+
speech_synthesis_voice_name = "zh-CN-YunhaoNeural" ##云皓
|
28 |
print("azure key是",azure_speech_key)
|
29 |
print("azure_service_region是",azure_service_region)
|
30 |
print("my_openai_key",my_openai_key)
|
|
|
122 |
speech_key = azure_speech_key
|
123 |
service_region = azure_service_region
|
124 |
|
125 |
+
voiceText = text
|
126 |
+
url = f"https://{service_region}.tts.speech.microsoft.com/cognitiveservices/v1"
|
127 |
+
|
128 |
+
headers = {
|
129 |
+
"Ocp-Apim-Subscription-Key": speech_key,
|
130 |
+
"Content-Type": "application/ssml+xml",
|
131 |
+
"X-Microsoft-OutputFormat": "audio-16khz-128kbitrate-mono-mp3",
|
132 |
+
"User-Agent": "curl"
|
133 |
+
}
|
134 |
+
|
135 |
+
ssml_text = '''
|
136 |
+
<speak version='1.0' xml:lang='zh-CN'>
|
137 |
+
<voice xml:lang='zh-CN' xml:gender='male' name='{voiceName}'>
|
138 |
+
{voiceText}
|
139 |
+
</voice>
|
140 |
+
</speak>
|
141 |
+
'''.format(voiceName=speech_synthesis_voice_name,voiceText = voiceText)
|
142 |
+
|
143 |
+
response = requests.post(url, headers=headers, data=ssml_text.encode('utf-8'))
|
144 |
+
|
145 |
+
if response.status_code == 200:
|
146 |
+
# print("音频持续时间是",response.audio_duration)
|
147 |
+
# print("音频数据是",response.content)
|
148 |
+
# 创建临时文件 -当前路径下面
|
149 |
+
with tempfile.NamedTemporaryFile(dir='/',delete=False) as temp_file:
|
150 |
+
temp_file.write(response.content)
|
151 |
+
temp_file.close()
|
152 |
+
# 在这里完成您对文件的操作,比如返回文件名
|
153 |
+
file_name = temp_file.name
|
154 |
+
return file_name, str(response.audio_duration)
|
155 |
+
|
156 |
+
else:
|
157 |
+
print("Error: Failed to synthesize audio. Status code:", response.status_code)
|
158 |
+
|
159 |
+
|
160 |
+
|
161 |
|
162 |
|
163 |
# 补零函数,将数字部分补齐为指定长度
|