speech-to-speech-translation

Runtime error

App Files Files Community

crowbarmassage commited on Aug 23, 2023

Commit

44f6e5d

•

1 Parent(s): 147fb27

Upload app.py

Browse files

Files changed (1) hide show

app.py +4 -11

app.py CHANGED Viewed

@@ -8,12 +8,6 @@ Original file is located at
 """
 # Beginning of Unit 7
-#!pip install git+https://github.com/huggingface/transformers.git
-!pip install torch accelerate torchaudio datasets gradio sentencepiece
-!pip install -U transformers
-#!pip install sacremoses
-#!pip install -Uqq datasets[audio]
-#!pip install git+https://github.com/huggingface/transformers
 from transformers.models.markuplm.tokenization_markuplm import MARKUPLM_ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING
 import torch, torchaudio
@@ -85,7 +79,7 @@ def speech_to_speech_translation(audio):
     synthesised_speech = synthesise(translated_text)
     return 16000, synthesised_speech
-def adjusted_speech_to_speech_translation(audio_filepath):
     # Load the audio file
     waveform, sampling_rate = torchaudio.load(audio_filepath)
@@ -100,12 +94,11 @@ def adjusted_speech_to_speech_translation(audio_filepath):
             "sampling_rate": sampling_rate
         }
     }
     transcribed_text = transcribe(audio_dict)
     translated_text = translate(transcribed_text)
     #print(transcribed_text)
     #print(translated_text)
-    synthesised_speech = synthesise(translated_text)
     #print(synthesised_speech)
     #print(torch.min(synthesised_speech), torch.max(synthesised_speech))
     synthesised_speech = (synthesised_speech * 32767).numpy().astype(np.int16)
@@ -118,13 +111,13 @@ import gradio as gr
 demo = gr.Blocks()
 mic_translate = gr.Interface(
-    fn=adjusted_speech_to_speech_translation,
     inputs=gr.Audio(source="microphone", type="filepath"),
     outputs=gr.Audio(label="Generated Speech", type="numpy"),
 )
 file_translate = gr.Interface(
-    fn=adjusted_speech_to_speech_translation,
     inputs=gr.Audio(source="upload", type="filepath"),
     outputs=gr.Audio(label="Generated Speech", type="numpy"),
 )

 """
 # Beginning of Unit 7
 from transformers.models.markuplm.tokenization_markuplm import MARKUPLM_ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING
 import torch, torchaudio
     synthesised_speech = synthesise(translated_text)
     return 16000, synthesised_speech
+def speech_to_speech_translation(audio_filepath):
     # Load the audio file
     waveform, sampling_rate = torchaudio.load(audio_filepath)
             "sampling_rate": sampling_rate
         }
     }
     transcribed_text = transcribe(audio_dict)
     translated_text = translate(transcribed_text)
+    synthesised_speech = synthesise(translated_text)
     #print(transcribed_text)
     #print(translated_text)
     #print(synthesised_speech)
     #print(torch.min(synthesised_speech), torch.max(synthesised_speech))
     synthesised_speech = (synthesised_speech * 32767).numpy().astype(np.int16)
 demo = gr.Blocks()
 mic_translate = gr.Interface(
+    fn=speech_to_speech_translation,
     inputs=gr.Audio(source="microphone", type="filepath"),
     outputs=gr.Audio(label="Generated Speech", type="numpy"),
 )
 file_translate = gr.Interface(
+    fn=speech_to_speech_translation,
     inputs=gr.Audio(source="upload", type="filepath"),
     outputs=gr.Audio(label="Generated Speech", type="numpy"),
 )