Baghdad99 commited on
Commit
a48f8e0
1 Parent(s): 425531b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -10
app.py CHANGED
@@ -11,25 +11,42 @@ pipe = pipeline(
11
  translator = pipeline("text2text-generation", model="Baghdad99/saad-hausa-text-to-english-text")
12
  tts = pipeline("text-to-speech", model="Baghdad99/english_voice_tts")
13
 
14
- # Define the function to translate speech
15
  def translate_speech(audio):
16
  # Separate the sample rate and the audio data
17
  sample_rate, audio_data = audio
18
 
19
  # Use the speech recognition pipeline to transcribe the audio
20
- transcription = pipe(audio_data)["transcription"]
 
21
 
22
- # Use the translation pipeline to translate the transcription
23
- translated_text = translator(transcription, return_tensors="pt", padding=True)
 
 
 
 
24
 
25
- # Use the text-to-speech pipeline to synthesize the translated text
26
- synthesised_speech = tts(translated_text, return_tensors='pt')
27
 
28
- # Define the max_range variable
29
- max_range = 32767 # You can adjust this value based on your requirements
30
- synthesised_speech = (synthesised_speech.numpy() * max_range).astype(np.int16)
 
31
 
32
- return 16000, synthesised_speech
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
 
35
  # Define the Gradio interface
 
11
  translator = pipeline("text2text-generation", model="Baghdad99/saad-hausa-text-to-english-text")
12
  tts = pipeline("text-to-speech", model="Baghdad99/english_voice_tts")
13
 
 
14
  def translate_speech(audio):
15
  # Separate the sample rate and the audio data
16
  sample_rate, audio_data = audio
17
 
18
  # Use the speech recognition pipeline to transcribe the audio
19
+ output = pipe(audio_data)
20
+ print(f"Output: {output}") # Print the output to see what it contains
21
 
22
+ # Check if the output contains 'transcription'
23
+ if 'transcription' in output:
24
+ transcription = output["transcription"]
25
+ else:
26
+ print("The output does not contain 'transcription'")
27
+ return
28
 
29
+ # Rest of your code...
 
30
 
31
+ # # Define the function to translate speech
32
+ # def translate_speech(audio):
33
+ # # Separate the sample rate and the audio data
34
+ # sample_rate, audio_data = audio
35
 
36
+ # # Use the speech recognition pipeline to transcribe the audio
37
+ # transcription = pipe(audio_data)["transcription"]
38
+
39
+ # # Use the translation pipeline to translate the transcription
40
+ # translated_text = translator(transcription, return_tensors="pt", padding=True)
41
+
42
+ # # Use the text-to-speech pipeline to synthesize the translated text
43
+ # synthesised_speech = tts(translated_text, return_tensors='pt')
44
+
45
+ # # Define the max_range variable
46
+ # max_range = 32767 # You can adjust this value based on your requirements
47
+ # synthesised_speech = (synthesised_speech.numpy() * max_range).astype(np.int16)
48
+
49
+ # return 16000, synthesised_speech
50
 
51
 
52
  # Define the Gradio interface