Baghdad99 commited on
Commit
72632b9
1 Parent(s): a48f8e0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -23
app.py CHANGED
@@ -11,6 +11,7 @@ pipe = pipeline(
11
  translator = pipeline("text2text-generation", model="Baghdad99/saad-hausa-text-to-english-text")
12
  tts = pipeline("text-to-speech", model="Baghdad99/english_voice_tts")
13
 
 
14
  def translate_speech(audio):
15
  # Separate the sample rate and the audio data
16
  sample_rate, audio_data = audio
@@ -19,35 +20,24 @@ def translate_speech(audio):
19
  output = pipe(audio_data)
20
  print(f"Output: {output}") # Print the output to see what it contains
21
 
22
- # Check if the output contains 'transcription'
23
- if 'transcription' in output:
24
- transcription = output["transcription"]
25
  else:
26
- print("The output does not contain 'transcription'")
27
  return
28
 
29
- # Rest of your code...
 
30
 
31
- # # Define the function to translate speech
32
- # def translate_speech(audio):
33
- # # Separate the sample rate and the audio data
34
- # sample_rate, audio_data = audio
35
 
36
- # # Use the speech recognition pipeline to transcribe the audio
37
- # transcription = pipe(audio_data)["transcription"]
38
-
39
- # # Use the translation pipeline to translate the transcription
40
- # translated_text = translator(transcription, return_tensors="pt", padding=True)
41
-
42
- # # Use the text-to-speech pipeline to synthesize the translated text
43
- # synthesised_speech = tts(translated_text, return_tensors='pt')
44
-
45
- # # Define the max_range variable
46
- # max_range = 32767 # You can adjust this value based on your requirements
47
- # synthesised_speech = (synthesised_speech.numpy() * max_range).astype(np.int16)
48
-
49
- # return 16000, synthesised_speech
50
 
 
51
 
52
  # Define the Gradio interface
53
  iface = gr.Interface(
 
11
  translator = pipeline("text2text-generation", model="Baghdad99/saad-hausa-text-to-english-text")
12
  tts = pipeline("text-to-speech", model="Baghdad99/english_voice_tts")
13
 
14
+ # Define the function to translate speech
15
  def translate_speech(audio):
16
  # Separate the sample rate and the audio data
17
  sample_rate, audio_data = audio
 
20
  output = pipe(audio_data)
21
  print(f"Output: {output}") # Print the output to see what it contains
22
 
23
+ # Check if the output contains 'text'
24
+ if 'text' in output:
25
+ transcription = output["text"]
26
  else:
27
+ print("The output does not contain 'text'")
28
  return
29
 
30
+ # Use the translation pipeline to translate the transcription
31
+ translated_text = translator(transcription, return_tensors="pt", padding=True)
32
 
33
+ # Use the text-to-speech pipeline to synthesize the translated text
34
+ synthesised_speech = tts(translated_text, return_tensors='pt')
 
 
35
 
36
+ # Define the max_range variable
37
+ max_range = 32767 # You can adjust this value based on your requirements
38
+ synthesised_speech = (synthesised_speech.numpy() * max_range).astype(np.int16)
 
 
 
 
 
 
 
 
 
 
 
39
 
40
+ return 16000, synthesised_speech
41
 
42
  # Define the Gradio interface
43
  iface = gr.Interface(