Naksh786 commited on
Commit
d195c23
1 Parent(s): 34ab256

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -24
app.py CHANGED
@@ -1,32 +1,20 @@
1
- import gradio as gr
2
- from transformers import SpeechT5ForTextToSpeech, SpeechT5Processor
3
  import torch
4
- import torchaudio
5
- import tempfile
6
 
7
- # Load model and processor
8
  processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
9
  model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
10
 
11
- # Load a voice embedding (necessary for the SpeechT5 model)
12
- speaker_embedding, _ = torchaudio.load("https://huggingface.co/microsoft/speecht5_tts/blob/main/speaker_embeddings/english/vctk_speaker_0.pt")
13
-
14
  def text_to_speech(text):
 
15
  inputs = processor(text, return_tensors="pt")
16
- speech = model.generate_speech(inputs["input_ids"], speaker_embedding)
17
-
18
- # Save the output to a temporary file
19
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
20
- torchaudio.save(f.name, speech, 16000)
21
- return f.name
22
-
23
- # Gradio interface
24
- interface = gr.Interface(
25
- fn=text_to_speech,
26
- inputs="text",
27
- outputs="audio",
28
- title="Text to Speech",
29
- description="Convert text to speech using the microsoft/speecht5_tts model"
30
- )
31
 
32
- interface.launch()
 
 
 
1
  import torch
2
+ from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech
3
+ import soundfile as sf
4
 
5
+ # Initialize the model and processor from Hugging Face
6
  processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
7
  model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
8
 
 
 
 
9
  def text_to_speech(text):
10
+ # Process the input text into tokens
11
  inputs = processor(text, return_tensors="pt")
12
+
13
+ # Generate speech
14
+ with torch.no_grad():
15
+ speech = model.generate_speech(inputs.input_ids)
16
+
17
+ # Save the generated speech as a WAV file
18
+ sf.write('output.wav', speech.squeeze().cpu().numpy(), 16000)
 
 
 
 
 
 
 
 
19
 
20
+ return "output.wav"