File size: 1,477 Bytes
03d611c
248e28b
812f25f
d344bc4
3226f5e
ccae267
d53d512
ccae267
3226f5e
182c411
 
 
d344bc4
182c411
 
 
 
d344bc4
fa0a6d8
d344bc4
6a6f2e1
 
 
 
 
 
 
 
 
 
 
 
 
 
d344bc4
6a6f2e1
d344bc4
4b21b18
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import spaces
import torch
import gradio as gr
from TTS.api import TTS
import os
from unittest.mock import patch
os.environ["COQUI_TOS_AGREED"] = "1"


# Function to always return 'y'
def always_yes(*args, **kwargs):
    return 'y'

# Patch the input function to always return 'y'
with patch('builtins.input', always_yes):
    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", progress_bar=False).to(device)

@spaces.GPU(enable_queue=True)
def generate_voice(text, audio_file_path):
    # Create a directory if it does not exist
    output_dir = "/tmp/"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    output_path = os.path.join(output_dir, "cloned_audio.wav")  # Using /tmp/ directory
    tts.tts_to_file(
        text,
        speaker_wav=audio_file_path,  # Directly use the file path string
        language="en",  # Assuming the language is English
        file_path=output_path,
        split_sentences=True,
        # Assuming the TTS model requires a speaker identifier and '1' is a valid identifier
    )
    return output_path

    
import gradio as gr
# Define the Gradio interface
iface = gr.Interface(
    fn=generate_voice,
    inputs=[
        gr.Textbox(label="Input Text"),
        gr.Audio(label="Input Audio", type="filepath")
    ],
    outputs=gr.Audio(label="Cloned Voice"),
    title="Voice Cloning TTS"
)

# Launch the interface
iface.launch()