Spaces:

deekshachilukuri
/

Voice.clone

Running

File size: 1,477 Bytes

03d611c
248e28b
812f25f
d344bc4
3226f5e
ccae267
d53d512
ccae267
3226f5e
182c411
 
 
d344bc4
182c411
 
 
 
d344bc4
fa0a6d8
d344bc4
6a6f2e1
 
 
 
 
 
 
 
 
 
 
 
 
 
d344bc4
6a6f2e1
d344bc4
4b21b18

import spaces
import torch
import gradio as gr
from TTS.api import TTS
import os
from unittest.mock import patch
os.environ["COQUI_TOS_AGREED"] = "1"


# Function to always return 'y'
def always_yes(*args, **kwargs):
    return 'y'

# Patch the input function to always return 'y'
with patch('builtins.input', always_yes):
    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", progress_bar=False).to(device)

@spaces.GPU(enable_queue=True)
def generate_voice(text, audio_file_path):
    # Create a directory if it does not exist
    output_dir = "/tmp/"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    output_path = os.path.join(output_dir, "cloned_audio.wav")  # Using /tmp/ directory
    tts.tts_to_file(
        text,
        speaker_wav=audio_file_path,  # Directly use the file path string
        language="en",  # Assuming the language is English
        file_path=output_path,
        split_sentences=True,
        # Assuming the TTS model requires a speaker identifier and '1' is a valid identifier
    )
    return output_path

    
import gradio as gr
# Define the Gradio interface
iface = gr.Interface(
    fn=generate_voice,
    inputs=[
        gr.Textbox(label="Input Text"),
        gr.Audio(label="Input Audio", type="filepath")
    ],
    outputs=gr.Audio(label="Cloned Voice"),
    title="Voice Cloning TTS"
)

# Launch the interface
iface.launch()