File size: 676 Bytes
df165b9
d195c23
 
df165b9
d195c23
03088e5
 
 
df165b9
d195c23
03088e5
d195c23
 
 
 
 
 
 
df165b9
d195c23
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
import torch
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech
import soundfile as sf

# Initialize the model and processor from Hugging Face
processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")

def text_to_speech(text):
    # Process the input text into tokens
    inputs = processor(text, return_tensors="pt")
    
    # Generate speech
    with torch.no_grad():
        speech = model.generate_speech(inputs.input_ids)
    
    # Save the generated speech as a WAV file
    sf.write('output.wav', speech.squeeze().cpu().numpy(), 16000)

    return "output.wav"