speech-to-speech-translation

Sleeping

64FC commited on Aug 29, 2023

Commit

602984b

•

1 Parent(s): 7beb980

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -11,6 +11,10 @@ pipe = pipeline("automatic-speech-recognition",
                 device=device
 )
 # Define a function to translate an audio, in French here
 def translate(audio):
@@ -19,11 +23,6 @@ def translate(audio):
     return outputs["text"]
-# Load the model checkpoint and tokenizer
-model = VitsModel.from_pretrained("Matthijs/mms-tts-fra")
-tokenizer = VitsTokenizer.from_pretrained("Matthijs/mms-tts-fra")
 # Define function to generate the waveform output
 def synthesise(text):
     inputs = tokenizer(text, return_tensors="pt")
@@ -35,17 +34,12 @@ def synthesise(text):
     return outputs.audio[0]
-# Define global variables
-target_dtype = np.int16 # format expected by Gradio
-max_range = np.iinfo(target_dtype).max
 # Define the pipeline
 def speech_to_speech_translation(audio):
     translated_text = translate(audio)
     synthesised_speech = synthesise(translated_text)
     synthesised_speech = (
-        synthesised_speech.numpy() * max_range).astype(target_dtype)
     return 16000, synthesised_speech

                 device=device
 )
+# Load the model checkpoint and tokenizer
+model = VitsModel.from_pretrained("Matthijs/mms-tts-fra")
+tokenizer = VitsTokenizer.from_pretrained("Matthijs/mms-tts-fra")
 # Define a function to translate an audio, in French here
 def translate(audio):
     return outputs["text"]
 # Define function to generate the waveform output
 def synthesise(text):
     inputs = tokenizer(text, return_tensors="pt")
     return outputs.audio[0]
 # Define the pipeline
 def speech_to_speech_translation(audio):
     translated_text = translate(audio)
     synthesised_speech = synthesise(translated_text)
     synthesised_speech = (
+        synthesised_speech.numpy() * 32767).astype(np.int16)
     return 16000, synthesised_speech