File size: 3,482 Bytes
e44094d
 
66ccdd7
a2dc963
d084eaa
 
e44094d
66ccdd7
 
 
d084eaa
 
66ccdd7
 
 
3271f83
66ccdd7
 
3271f83
66ccdd7
 
f5506fc
 
 
 
 
 
 
d084eaa
 
 
 
 
4e4528b
d084eaa
 
f5506fc
 
d084eaa
 
d7b5eaf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3271f83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d7b5eaf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f5506fc
66ccdd7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import gradio as gr
from outetts.v0_1.interface import InterfaceHF
import torch

# Initialize the TTS model interface
interface = InterfaceHF("OuteAI/OuteTTS-0.1-350M")

# Check if running on a CPU
is_cpu = not torch.cuda.is_available()

# Define a function to generate and save TTS output from input text
def generate_tts(text, temperature=0.1, repetition_penalty=1.1, max_length=4096):
    # Set a character limit for the text input
    max_characters = 30  # adjust as needed
    
    # Check if input text exceeds character limit when on CPU
    if is_cpu and len(text) > max_characters:
        raise gr.Error(
            f"Text input is too long! Please limit to {max_characters} characters.\nThis limit is in place to prevent long processing times as this interface is running on a free CPU tier."
        )

    # Log user input and parameters in the terminal
    print(f"User entered text: {text}")
    print(f"Temperature set to: {temperature}")
    print(f"Repetition Penalty set to: {repetition_penalty}")
    print(f"Max Length set to: {max_length}")
    
    # Generate TTS output
    output = interface.generate(
        text=text,
        temperature=temperature,
        repetition_penalty=repetition_penalty,
        max_lenght=max_length
    )
    # Save the output audio to a file
    output.save("output.wav")
    print("Audio generated and saved as output.wav")
    
    return "output.wav"

# Create the Gradio Blocks interface
with gr.Blocks() as demo:
    # Log each interaction
    def on_text_input(text):
        print(f"User typed text: {text}")

    def on_temperature_change(val):
        print(f"Temperature slider adjusted to: {val}")

    def on_repetition_penalty_change(val):
        print(f"Repetition Penalty slider adjusted to: {val}")

    def on_max_length_change(val):
        print(f"Max Length slider adjusted to: {val}")
    
    # Dynamically set max_chars for text input based on whether it's CPU or GPU
    if is_cpu:
        text_input = gr.Textbox(
            lines=2,
            placeholder="Enter text to convert to speech (30 character limit on CPU)",
            label="Text",
            max_length=30  # Enforce character limit only on CPU
        )
    else:
        text_input = gr.Textbox(
            lines=2,
            placeholder="Enter text to convert to speech",
            label="Text"
        )
    
    # Track changes for debugging
    text_input.change(on_text_input, inputs=text_input)
    
    # Sliders with change events for tracking
    temperature_slider = gr.Slider(0.1, 1.0, value=0.1, label="Temperature")
    temperature_slider.change(on_temperature_change, inputs=temperature_slider)

    repetition_penalty_slider = gr.Slider(1.0, 2.0, value=1.1, label="Repetition Penalty")
    repetition_penalty_slider.change(on_repetition_penalty_change, inputs=repetition_penalty_slider)

    max_length_slider = gr.Slider(512, 4096, value=4096, step=256, label="Max Length")
    max_length_slider.change(on_max_length_change, inputs=max_length_slider)

    # Button to generate TTS and Audio output
    generate_button = gr.Button("Generate Speech")
    audio_output = gr.Audio(type="filepath", label="Generated Speech")

    # Define interaction between input and output
    generate_button.click(
        generate_tts, 
        inputs=[text_input, temperature_slider, repetition_penalty_slider, max_length_slider], 
        outputs=audio_output
    )

print("Launching Gradio interface...")
demo.launch()