64FC commited on
Commit
602984b
1 Parent(s): 7beb980

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -11
app.py CHANGED
@@ -11,6 +11,10 @@ pipe = pipeline("automatic-speech-recognition",
11
  device=device
12
  )
13
 
 
 
 
 
14
 
15
  # Define a function to translate an audio, in French here
16
  def translate(audio):
@@ -19,11 +23,6 @@ def translate(audio):
19
  return outputs["text"]
20
 
21
 
22
- # Load the model checkpoint and tokenizer
23
- model = VitsModel.from_pretrained("Matthijs/mms-tts-fra")
24
- tokenizer = VitsTokenizer.from_pretrained("Matthijs/mms-tts-fra")
25
-
26
-
27
  # Define function to generate the waveform output
28
  def synthesise(text):
29
  inputs = tokenizer(text, return_tensors="pt")
@@ -35,17 +34,12 @@ def synthesise(text):
35
  return outputs.audio[0]
36
 
37
 
38
- # Define global variables
39
- target_dtype = np.int16 # format expected by Gradio
40
- max_range = np.iinfo(target_dtype).max
41
-
42
-
43
  # Define the pipeline
44
  def speech_to_speech_translation(audio):
45
  translated_text = translate(audio)
46
  synthesised_speech = synthesise(translated_text)
47
  synthesised_speech = (
48
- synthesised_speech.numpy() * max_range).astype(target_dtype)
49
  return 16000, synthesised_speech
50
 
51
 
 
11
  device=device
12
  )
13
 
14
+ # Load the model checkpoint and tokenizer
15
+ model = VitsModel.from_pretrained("Matthijs/mms-tts-fra")
16
+ tokenizer = VitsTokenizer.from_pretrained("Matthijs/mms-tts-fra")
17
+
18
 
19
  # Define a function to translate an audio, in French here
20
  def translate(audio):
 
23
  return outputs["text"]
24
 
25
 
 
 
 
 
 
26
  # Define function to generate the waveform output
27
  def synthesise(text):
28
  inputs = tokenizer(text, return_tensors="pt")
 
34
  return outputs.audio[0]
35
 
36
 
 
 
 
 
 
37
  # Define the pipeline
38
  def speech_to_speech_translation(audio):
39
  translated_text = translate(audio)
40
  synthesised_speech = synthesise(translated_text)
41
  synthesised_speech = (
42
+ synthesised_speech.numpy() * 32767).astype(np.int16)
43
  return 16000, synthesised_speech
44
 
45