tts-bark-test / app.py
gauri-sharan's picture
Update app.py
6b6d9ba verified
import gradio as gr
import torch
from transformers import AutoProcessor, AutoModel
import scipy.io.wavfile as wavfile
import spaces
# Processor
def load_model():
processor = AutoProcessor.from_pretrained("suno/bark-small")
model = AutoModel.from_pretrained("suno/bark-small")
model.eval() # Set the model to evaluation mode
return processor, model
# Load models on startup
print("Loading models...")
processor, model = load_model()
print("Models loaded successfully!")
@spaces.GPU # Decorate the function to enable GPU usage
def text_to_speech(text):
try:
# Check if a GPU is available and set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Move model to GPU
model.to(device)
inputs = processor(
text=[text],
return_tensors="pt",
).to(device) # Move inputs to GPU
# Generate speech values on the GPU
with torch.no_grad(): # Disable gradient calculation for inference
speech_values = model.generate(**inputs, do_sample=True)
# Move generated audio data back to CPU for saving
audio_data = speech_values.cpu().numpy().squeeze()
sampling_rate = model.generation_config.sample_rate
temp_path = "temp_audio.wav"
wavfile.write(temp_path, sampling_rate, audio_data)
return temp_path
except Exception as e:
return f"Error generating speech: {str(e)}"
# Define Gradio interface
demo = gr.Interface(
fn=text_to_speech,
inputs=[
gr.Textbox(
label="Enter text",
placeholder="दिल्ली मेट्रो में आपका स्वागत है"
)
],
outputs=gr.Audio(label="Generated Speech"),
title="Bark TTS Test App",
description="This app generates speech from text using the Bark TTS model.",
examples=[
["दिल्ली मेट्रो में आपका स्वागत है"],
["अगला स्टेशन राजीव चौक है"]
],
theme="default"
)
if __name__ == "__main__":
demo.launch()