Spaces:
Running
Running
import gradio as gr | |
from outetts.v0_1.interface import InterfaceHF | |
import torch | |
# Initialize the TTS model interface | |
interface = InterfaceHF("OuteAI/OuteTTS-0.1-350M") | |
# Check if running on a CPU | |
is_cpu = not torch.cuda.is_available() | |
# Define a function to generate and save TTS output from input text | |
def generate_tts(text, temperature=0.1, repetition_penalty=1.1, max_length=4096): | |
# Set a character limit for the text input | |
max_characters = 30 # adjust as needed | |
# Check if input text exceeds character limit | |
if is_cpu and len(text) > max_characters: | |
raise gr.Error( | |
f"Text input is too long! Please limit to {max_characters} characters.\n This limit is in place to prevent long processing times as this interface is running on a free CPU tier." | |
) | |
# Log user input and parameters in the terminal | |
print(f"User entered text: {text}") | |
print(f"Temperature set to: {temperature}") | |
print(f"Repetition Penalty set to: {repetition_penalty}") | |
print(f"Max Length set to: {max_length}") | |
# Generate TTS output | |
output = interface.generate( | |
text=text, | |
temperature=temperature, | |
repetition_penalty=repetition_penalty, | |
max_lenght=max_length | |
) | |
# Save the output audio to a file | |
output.save("output.wav") | |
print("Audio generated and saved as output.wav") | |
return "output.wav" | |
# Create the Gradio Blocks interface | |
with gr.Blocks() as demo: | |
# Log each interaction | |
def on_text_input(text): | |
print(f"User typed text: {text}") | |
def on_temperature_change(val): | |
print(f"Temperature slider adjusted to: {val}") | |
def on_repetition_penalty_change(val): | |
print(f"Repetition Penalty slider adjusted to: {val}") | |
def on_max_length_change(val): | |
print(f"Max Length slider adjusted to: {val}") | |
# Text input | |
text_input = gr.Textbox(lines=2, placeholder="Enter text to convert to speech", label="Text") | |
text_input.change(on_text_input, inputs=text_input) | |
# Sliders with change events for tracking | |
temperature_slider = gr.Slider(0.1, 1.0, value=0.1, label="Temperature") | |
temperature_slider.change(on_temperature_change, inputs=temperature_slider) | |
repetition_penalty_slider = gr.Slider(1.0, 2.0, value=1.1, label="Repetition Penalty") | |
repetition_penalty_slider.change(on_repetition_penalty_change, inputs=repetition_penalty_slider) | |
max_length_slider = gr.Slider(512, 4096, value=4096, step=256, label="Max Length") | |
max_length_slider.change(on_max_length_change, inputs=max_length_slider) | |
# Button to generate TTS and Audio output | |
generate_button = gr.Button("Generate Speech") | |
audio_output = gr.Audio(type="filepath", label="Generated Speech") | |
# Define interaction between input and output | |
generate_button.click( | |
generate_tts, | |
inputs=[text_input, temperature_slider, repetition_penalty_slider, max_length_slider], | |
outputs=audio_output | |
) | |
print("Launching Gradio interface...") | |
demo.launch() |