from diffusers import AudioLDMPipeline import torch import gradio as gr from googletrans import Translator if torch.cuda.is_available(): device = "cuda" torch_dtype = torch.float16 else: device = "cpu" torch_dtype = torch.float32 print(device) repo_id = "cvssp/audioldm-m-full" pipe = AudioLDMPipeline.from_pretrained(repo_id, torch_dtype=torch_dtype) pipe = pipe.to(device) #pipe.unet = torch.compile(pipe.unet) def generate_sound(text): print(text) text=translate_text(text) print(text) waveforms = pipe(text, num_inference_steps=25, audio_length_in_s=5, negative_prompt = "low quality, average quality").audios rate =16000 return rate, waveforms[0] #return gr.make_waveform((rate, waveforms[0])) def translate_text(text): translator = Translator() translated_text=translator.translate(text, src='es',dest="en") return translated_text.text demo = gr.Blocks() with demo: with gr.Row(): with gr.Column(): text = gr.Textbox(value="Ingrese el texto:") button = gr.Button(value="Generar") with gr.Column(): output = gr.Audio() #output = gr.Video(label="Output") button.click(generate_sound,text,output) demo.launch()