Spaces:

drewThomasson
/

OuteTTS-DEMO

Running

File size: 3,482 Bytes

e44094d
 
66ccdd7
a2dc963
d084eaa
 
e44094d
66ccdd7
 
 
d084eaa
 
66ccdd7
 
 
3271f83
66ccdd7
 
3271f83
66ccdd7
 
f5506fc
 
 
 
 
 
 
d084eaa
 
 
 
 
4e4528b
d084eaa
 
f5506fc
 
d084eaa
 
d7b5eaf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3271f83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d7b5eaf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f5506fc
66ccdd7

import gradio as gr
from outetts.v0_1.interface import InterfaceHF
import torch

# Initialize the TTS model interface
interface = InterfaceHF("OuteAI/OuteTTS-0.1-350M")

# Check if running on a CPU
is_cpu = not torch.cuda.is_available()

# Define a function to generate and save TTS output from input text
def generate_tts(text, temperature=0.1, repetition_penalty=1.1, max_length=4096):
    # Set a character limit for the text input
    max_characters = 30  # adjust as needed
    
    # Check if input text exceeds character limit when on CPU
    if is_cpu and len(text) > max_characters:
        raise gr.Error(
            f"Text input is too long! Please limit to {max_characters} characters.\nThis limit is in place to prevent long processing times as this interface is running on a free CPU tier."
        )

    # Log user input and parameters in the terminal
    print(f"User entered text: {text}")
    print(f"Temperature set to: {temperature}")
    print(f"Repetition Penalty set to: {repetition_penalty}")
    print(f"Max Length set to: {max_length}")
    
    # Generate TTS output
    output = interface.generate(
        text=text,
        temperature=temperature,
        repetition_penalty=repetition_penalty,
        max_lenght=max_length
    )
    # Save the output audio to a file
    output.save("output.wav")
    print("Audio generated and saved as output.wav")
    
    return "output.wav"

# Create the Gradio Blocks interface
with gr.Blocks() as demo:
    # Log each interaction
    def on_text_input(text):
        print(f"User typed text: {text}")

    def on_temperature_change(val):
        print(f"Temperature slider adjusted to: {val}")

    def on_repetition_penalty_change(val):
        print(f"Repetition Penalty slider adjusted to: {val}")

    def on_max_length_change(val):
        print(f"Max Length slider adjusted to: {val}")
    
    # Dynamically set max_chars for text input based on whether it's CPU or GPU
    if is_cpu:
        text_input = gr.Textbox(
            lines=2,
            placeholder="Enter text to convert to speech (30 character limit on CPU)",
            label="Text",
            max_length=30  # Enforce character limit only on CPU
        )
    else:
        text_input = gr.Textbox(
            lines=2,
            placeholder="Enter text to convert to speech",
            label="Text"
        )
    
    # Track changes for debugging
    text_input.change(on_text_input, inputs=text_input)
    
    # Sliders with change events for tracking
    temperature_slider = gr.Slider(0.1, 1.0, value=0.1, label="Temperature")
    temperature_slider.change(on_temperature_change, inputs=temperature_slider)

    repetition_penalty_slider = gr.Slider(1.0, 2.0, value=1.1, label="Repetition Penalty")
    repetition_penalty_slider.change(on_repetition_penalty_change, inputs=repetition_penalty_slider)

    max_length_slider = gr.Slider(512, 4096, value=4096, step=256, label="Max Length")
    max_length_slider.change(on_max_length_change, inputs=max_length_slider)

    # Button to generate TTS and Audio output
    generate_button = gr.Button("Generate Speech")
    audio_output = gr.Audio(type="filepath", label="Generated Speech")

    # Define interaction between input and output
    generate_button.click(
        generate_tts, 
        inputs=[text_input, temperature_slider, repetition_penalty_slider, max_length_slider], 
        outputs=audio_output
    )

print("Launching Gradio interface...")
demo.launch()