Spaces:
Runtime error
Runtime error
crowbarmassage
commited on
Commit
•
44f6e5d
1
Parent(s):
147fb27
Upload app.py
Browse files
app.py
CHANGED
@@ -8,12 +8,6 @@ Original file is located at
|
|
8 |
"""
|
9 |
|
10 |
# Beginning of Unit 7
|
11 |
-
#!pip install git+https://github.com/huggingface/transformers.git
|
12 |
-
!pip install torch accelerate torchaudio datasets gradio sentencepiece
|
13 |
-
!pip install -U transformers
|
14 |
-
#!pip install sacremoses
|
15 |
-
#!pip install -Uqq datasets[audio]
|
16 |
-
#!pip install git+https://github.com/huggingface/transformers
|
17 |
|
18 |
from transformers.models.markuplm.tokenization_markuplm import MARKUPLM_ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING
|
19 |
import torch, torchaudio
|
@@ -85,7 +79,7 @@ def speech_to_speech_translation(audio):
|
|
85 |
synthesised_speech = synthesise(translated_text)
|
86 |
return 16000, synthesised_speech
|
87 |
|
88 |
-
def
|
89 |
# Load the audio file
|
90 |
waveform, sampling_rate = torchaudio.load(audio_filepath)
|
91 |
|
@@ -100,12 +94,11 @@ def adjusted_speech_to_speech_translation(audio_filepath):
|
|
100 |
"sampling_rate": sampling_rate
|
101 |
}
|
102 |
}
|
103 |
-
|
104 |
transcribed_text = transcribe(audio_dict)
|
105 |
translated_text = translate(transcribed_text)
|
|
|
106 |
#print(transcribed_text)
|
107 |
#print(translated_text)
|
108 |
-
synthesised_speech = synthesise(translated_text)
|
109 |
#print(synthesised_speech)
|
110 |
#print(torch.min(synthesised_speech), torch.max(synthesised_speech))
|
111 |
synthesised_speech = (synthesised_speech * 32767).numpy().astype(np.int16)
|
@@ -118,13 +111,13 @@ import gradio as gr
|
|
118 |
demo = gr.Blocks()
|
119 |
|
120 |
mic_translate = gr.Interface(
|
121 |
-
fn=
|
122 |
inputs=gr.Audio(source="microphone", type="filepath"),
|
123 |
outputs=gr.Audio(label="Generated Speech", type="numpy"),
|
124 |
)
|
125 |
|
126 |
file_translate = gr.Interface(
|
127 |
-
fn=
|
128 |
inputs=gr.Audio(source="upload", type="filepath"),
|
129 |
outputs=gr.Audio(label="Generated Speech", type="numpy"),
|
130 |
)
|
|
|
8 |
"""
|
9 |
|
10 |
# Beginning of Unit 7
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
from transformers.models.markuplm.tokenization_markuplm import MARKUPLM_ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING
|
13 |
import torch, torchaudio
|
|
|
79 |
synthesised_speech = synthesise(translated_text)
|
80 |
return 16000, synthesised_speech
|
81 |
|
82 |
+
def speech_to_speech_translation(audio_filepath):
|
83 |
# Load the audio file
|
84 |
waveform, sampling_rate = torchaudio.load(audio_filepath)
|
85 |
|
|
|
94 |
"sampling_rate": sampling_rate
|
95 |
}
|
96 |
}
|
|
|
97 |
transcribed_text = transcribe(audio_dict)
|
98 |
translated_text = translate(transcribed_text)
|
99 |
+
synthesised_speech = synthesise(translated_text)
|
100 |
#print(transcribed_text)
|
101 |
#print(translated_text)
|
|
|
102 |
#print(synthesised_speech)
|
103 |
#print(torch.min(synthesised_speech), torch.max(synthesised_speech))
|
104 |
synthesised_speech = (synthesised_speech * 32767).numpy().astype(np.int16)
|
|
|
111 |
demo = gr.Blocks()
|
112 |
|
113 |
mic_translate = gr.Interface(
|
114 |
+
fn=speech_to_speech_translation,
|
115 |
inputs=gr.Audio(source="microphone", type="filepath"),
|
116 |
outputs=gr.Audio(label="Generated Speech", type="numpy"),
|
117 |
)
|
118 |
|
119 |
file_translate = gr.Interface(
|
120 |
+
fn=speech_to_speech_translation,
|
121 |
inputs=gr.Audio(source="upload", type="filepath"),
|
122 |
outputs=gr.Audio(label="Generated Speech", type="numpy"),
|
123 |
)
|