drewThomasson commited on
Commit
4e4528b
β€’
1 Parent(s): 6ca77a8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -47
app.py CHANGED
@@ -1,68 +1,124 @@
1
  import gradio as gr
2
  from outetts.v0_1.interface import InterfaceHF
 
3
 
4
- # Initialize the OuteTTS interface
5
- interface = InterfaceHF("OuteAI/OuteTTS-0.1-350M")
 
6
 
7
- def generate_tts(text, temperature, repetition_penalty, max_length):
8
- # Logging information to the terminal
9
- print("Generating TTS with the following parameters:")
10
- print(f"Text: {text}")
11
- print(f"Temperature: {temperature}")
12
- print(f"Repetition Penalty: {repetition_penalty}")
13
- print(f"Max Length: {max_length}")
 
14
 
15
- output = interface.generate(
16
- text=text,
17
- temperature=temperature,
18
- repetition_penalty=repetition_penalty,
19
- max_lenght=max_length
20
- )
21
- print("TTS generation complete. Output ready.")
22
- return output # Gradio will handle the audio directly
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- # Gradio Blocks API for structured UI
25
  with gr.Blocks() as demo:
26
- gr.Markdown("# OuteTTS - Text to Speech Interface")
27
- gr.Markdown("Generate speech from text using the OuteTTS model.")
 
 
 
 
 
 
 
 
 
28
 
29
  with gr.Row():
30
  text_input = gr.Textbox(
31
- label="Text Input",
32
- placeholder="Enter the text for TTS generation",
33
  lines=3
34
  )
35
 
36
- temperature = gr.Slider(
37
- minimum=0.1,
38
- maximum=1.0,
39
- value=0.1,
40
- step=0.01,
41
- label="Temperature"
42
- )
43
- repetition_penalty = gr.Slider(
44
- minimum=0.5,
45
- maximum=2.0,
46
- value=1.1,
47
- step=0.1,
48
- label="Repetition Penalty"
49
- )
50
- max_length = gr.Slider(
51
- minimum=256,
52
- maximum=4096,
53
- value=1024,
54
- step=256,
55
- label="Max Length"
 
 
 
 
 
 
 
 
56
  )
57
 
58
- output_audio = gr.Audio(label="Generated Speech", type="auto")
59
-
60
- generate_button = gr.Button("Generate Speech")
61
  generate_button.click(
62
  fn=generate_tts,
63
  inputs=[text_input, temperature, repetition_penalty, max_length],
64
  outputs=output_audio
65
  )
 
 
 
 
 
 
 
 
 
 
 
66
 
67
- # Launch the Gradio demo
68
- demo.launch()
 
 
1
  import gradio as gr
2
  from outetts.v0_1.interface import InterfaceHF
3
+ import logging
4
 
5
+ # Configure logging to display information in the terminal
6
+ logging.basicConfig(level=logging.INFO)
7
+ logger = logging.getLogger(__name__)
8
 
9
+ # Initialize the OuteTTS interface with the Hugging Face model
10
+ try:
11
+ logger.info("Initializing OuteTTS InterfaceHF with model 'OuteAI/OuteTTS-0.1-350M'")
12
+ interface = InterfaceHF("OuteAI/OuteTTS-0.1-350M")
13
+ logger.info("Model loaded successfully.")
14
+ except Exception as e:
15
+ logger.error(f"Failed to load model: {e}")
16
+ raise e
17
 
18
+ def generate_tts(text, temperature, repetition_penalty, max_length):
19
+ """
20
+ Generates speech from the input text using the OuteTTS model.
21
+
22
+ Parameters:
23
+ text (str): The input text for TTS.
24
+ temperature (float): Sampling temperature.
25
+ repetition_penalty (float): Repetition penalty.
26
+ max_length (int): Maximum length of the generated audio tokens.
27
+
28
+ Returns:
29
+ str: Path to the generated audio file.
30
+ """
31
+ logger.info("Received TTS generation request.")
32
+ logger.info(f"Parameters - Text: {text}, Temperature: {temperature}, Repetition Penalty: {repetition_penalty}, Max Length: {max_length}")
33
+
34
+ try:
35
+ output = interface.generate(
36
+ text=text,
37
+ temperature=temperature,
38
+ repetition_penalty=repetition_penalty,
39
+ max_length=max_length # Corrected spelling from 'max_lenght' to 'max_length'
40
+ )
41
+ logger.info("TTS generation complete.")
42
+
43
+ # Save the output to a temporary WAV file
44
+ output_path = "output.wav"
45
+ output.save(output_path)
46
+ logger.info(f"Audio saved to {output_path}")
47
+
48
+ return output_path # Gradio will handle the audio playback
49
+ except Exception as e:
50
+ logger.error(f"Error during TTS generation: {e}")
51
+ return None
52
 
53
+ # Define the Gradio Blocks interface
54
  with gr.Blocks() as demo:
55
+ gr.Markdown("# 🎀 OuteTTS - Text to Speech Interface")
56
+ gr.Markdown(
57
+ """
58
+ Generate speech from text using the **OuteTTS-0.1-350M** model.
59
+
60
+ **Key Features:**
61
+ - Pure language modeling approach to TTS
62
+ - Voice cloning capabilities
63
+ - Compatible with LLaMa architecture
64
+ """
65
+ )
66
 
67
  with gr.Row():
68
  text_input = gr.Textbox(
69
+ label="πŸ“„ Text Input",
70
+ placeholder="Enter the text for TTS generation",
71
  lines=3
72
  )
73
 
74
+ with gr.Row():
75
+ temperature = gr.Slider(
76
+ minimum=0.1,
77
+ maximum=1.0,
78
+ value=0.1,
79
+ step=0.01,
80
+ label="🌑️ Temperature"
81
+ )
82
+ repetition_penalty = gr.Slider(
83
+ minimum=0.5,
84
+ maximum=2.0,
85
+ value=1.1,
86
+ step=0.1,
87
+ label="πŸ” Repetition Penalty"
88
+ )
89
+ max_length = gr.Slider(
90
+ minimum=256,
91
+ maximum=4096,
92
+ value=1024,
93
+ step=256,
94
+ label="πŸ“ Max Length"
95
+ )
96
+
97
+ generate_button = gr.Button("πŸ”Š Generate Speech")
98
+
99
+ output_audio = gr.Audio(
100
+ label="🎧 Generated Speech",
101
+ type="filepath" # Expecting a file path to the audio
102
  )
103
 
104
+ # Define the button click event
 
 
105
  generate_button.click(
106
  fn=generate_tts,
107
  inputs=[text_input, temperature, repetition_penalty, max_length],
108
  outputs=output_audio
109
  )
110
+
111
+ gr.Markdown(
112
+ """
113
+ ---
114
+ **Technical Blog:** [OuteTTS-0.1-350M](https://www.outeai.com/blog/OuteTTS-0.1-350M)
115
+
116
+ **Credits:**
117
+ - [WavTokenizer](https://github.com/jishengpeng/WavTokenizer)
118
+ - [CTC Forced Alignment](https://pytorch.org/audio/stable/tutorials/ctc_forced_alignment_api_tutorial.html)
119
+ """
120
+ )
121
 
122
+ # Launch the Gradio app
123
+ if __name__ == "__main__":
124
+ demo.launch()