Spaces:

drewThomasson
/

OuteTTS-DEMO

Running

App Files Files Community

drewThomasson commited on 29 days ago

Commit

4e4528b

•

1 Parent(s): 6ca77a8

Update app.py

Browse files

Files changed (1) hide show

app.py +103 -47

app.py CHANGED Viewed

@@ -1,68 +1,124 @@
 import gradio as gr
 from outetts.v0_1.interface import InterfaceHF
-# Initialize the OuteTTS interface
-interface = InterfaceHF("OuteAI/OuteTTS-0.1-350M")
-def generate_tts(text, temperature, repetition_penalty, max_length):
-    # Logging information to the terminal
-    print("Generating TTS with the following parameters:")
-    print(f"Text: {text}")
-    print(f"Temperature: {temperature}")
-    print(f"Repetition Penalty: {repetition_penalty}")
-    print(f"Max Length: {max_length}")
-    output = interface.generate(
-        text=text,
-        temperature=temperature,
-        repetition_penalty=repetition_penalty,
-        max_lenght=max_length
-    )
-    print("TTS generation complete. Output ready.")
-    return output  # Gradio will handle the audio directly
-# Gradio Blocks API for structured UI
 with gr.Blocks() as demo:
-    gr.Markdown("# OuteTTS - Text to Speech Interface")
-    gr.Markdown("Generate speech from text using the OuteTTS model.")
     with gr.Row():
         text_input = gr.Textbox(
-            label="Text Input",
-            placeholder="Enter the text for TTS generation",
             lines=3
         )
-    temperature = gr.Slider(
-        minimum=0.1,
-        maximum=1.0,
-        value=0.1,
-        step=0.01,
-        label="Temperature"
-    )
-    repetition_penalty = gr.Slider(
-        minimum=0.5,
-        maximum=2.0,
-        value=1.1,
-        step=0.1,
-        label="Repetition Penalty"
-    )
-    max_length = gr.Slider(
-        minimum=256,
-        maximum=4096,
-        value=1024,
-        step=256,
-        label="Max Length"
     )
-    output_audio = gr.Audio(label="Generated Speech", type="auto")
-    generate_button = gr.Button("Generate Speech")
     generate_button.click(
         fn=generate_tts,
         inputs=[text_input, temperature, repetition_penalty, max_length],
         outputs=output_audio
     )
-# Launch the Gradio demo
-demo.launch()

 import gradio as gr
 from outetts.v0_1.interface import InterfaceHF
+import logging
+# Configure logging to display information in the terminal
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Initialize the OuteTTS interface with the Hugging Face model
+try:
+    logger.info("Initializing OuteTTS InterfaceHF with model 'OuteAI/OuteTTS-0.1-350M'")
+    interface = InterfaceHF("OuteAI/OuteTTS-0.1-350M")
+    logger.info("Model loaded successfully.")
+except Exception as e:
+    logger.error(f"Failed to load model: {e}")
+    raise e
+def generate_tts(text, temperature, repetition_penalty, max_length):
+    """
+    Generates speech from the input text using the OuteTTS model.
+    Parameters:
+        text (str): The input text for TTS.
+        temperature (float): Sampling temperature.
+        repetition_penalty (float): Repetition penalty.
+        max_length (int): Maximum length of the generated audio tokens.
+    Returns:
+        str: Path to the generated audio file.
+    """
+    logger.info("Received TTS generation request.")
+    logger.info(f"Parameters - Text: {text}, Temperature: {temperature}, Repetition Penalty: {repetition_penalty}, Max Length: {max_length}")
+    try:
+        output = interface.generate(
+            text=text,
+            temperature=temperature,
+            repetition_penalty=repetition_penalty,
+            max_length=max_length  # Corrected spelling from 'max_lenght' to 'max_length'
+        )
+        logger.info("TTS generation complete.")
+        # Save the output to a temporary WAV file
+        output_path = "output.wav"
+        output.save(output_path)
+        logger.info(f"Audio saved to {output_path}")
+        return output_path  # Gradio will handle the audio playback
+    except Exception as e:
+        logger.error(f"Error during TTS generation: {e}")
+        return None
+# Define the Gradio Blocks interface
 with gr.Blocks() as demo:
+    gr.Markdown("# 🎤 OuteTTS - Text to Speech Interface")
+    gr.Markdown(
+        """
+        Generate speech from text using the **OuteTTS-0.1-350M** model.
+        **Key Features:**
+        - Pure language modeling approach to TTS
+        - Voice cloning capabilities
+        - Compatible with LLaMa architecture
+        """
+    )
     with gr.Row():
         text_input = gr.Textbox(
+            label="📄 Text Input",
+            placeholder="Enter the text for TTS generation",
             lines=3
         )
+    with gr.Row():
+        temperature = gr.Slider(
+            minimum=0.1,
+            maximum=1.0,
+            value=0.1,
+            step=0.01,
+            label="🌡️ Temperature"
+        )
+        repetition_penalty = gr.Slider(
+            minimum=0.5,
+            maximum=2.0,
+            value=1.1,
+            step=0.1,
+            label="🔁 Repetition Penalty"
+        )
+        max_length = gr.Slider(
+            minimum=256,
+            maximum=4096,
+            value=1024,
+            step=256,
+            label="📏 Max Length"
+        )
+    generate_button = gr.Button("🔊 Generate Speech")
+    output_audio = gr.Audio(
+        label="🎧 Generated Speech",
+        type="filepath"  # Expecting a file path to the audio
     )
+    # Define the button click event
     generate_button.click(
         fn=generate_tts,
         inputs=[text_input, temperature, repetition_penalty, max_length],
         outputs=output_audio
     )
+    gr.Markdown(
+        """
+        ---
+        **Technical Blog:** [OuteTTS-0.1-350M](https://www.outeai.com/blog/OuteTTS-0.1-350M)
+        **Credits:**
+        - [WavTokenizer](https://github.com/jishengpeng/WavTokenizer)
+        - [CTC Forced Alignment](https://pytorch.org/audio/stable/tutorials/ctc_forced_alignment_api_tutorial.html)
+        """
+    )
+# Launch the Gradio app
+if __name__ == "__main__":
+    demo.launch()