Spaces:

mrolando
/

text_to_sound

Runtime error

App Files Files Community

matias commited on Sep 1, 2023

Commit

48443f7

•

1 Parent(s): cd8ff52

fixed space

Browse files

Files changed (2) hide show

Iso_Logotipo_Ceibal.png +0 -0
app.py +46 -13

Iso_Logotipo_Ceibal.png ADDED Viewed

app.py CHANGED Viewed

@@ -20,17 +20,26 @@ pipe = pipe.to(device)
 # pipe.unet = torch.compile(pipe.unet)
 #pipe.unet = torch.compile(pipe.unet)
-def generate_sound(text):
     print(text)
     # text=translate_text(text)
     text = translate_text(text)
     #translator = Translator()
     #text=translator.translate(text, src='es',dest="en").text
     print(text)
     waveforms = pipe(text,
-                     num_inference_steps=25,
-                     audio_length_in_s=5,
-                     negative_prompt = "low quality, average quality").audios
     rate =16000
     return rate, waveforms[0]
     #return gr.make_waveform((rate, waveforms[0]))
@@ -42,18 +51,42 @@ def translate_text(text):
     text = es_en_translator(text)[0].get("translation_text")
     return text
-demo = gr.Blocks()
-with demo:
     with gr.Row():
         with gr.Column():
-            text = gr.Textbox(value="Ingrese el texto:")
-            button = gr.Button(value="Generar")
         with gr.Column():
-            output = gr.Audio()
-            #output = gr.Video(label="Output")
-        button.click(generate_sound,text,output)
 demo.launch()

 # pipe.unet = torch.compile(pipe.unet)
 #pipe.unet = torch.compile(pipe.unet)
+import base64
+with open("Iso_Logotipo_Ceibal.png", "rb") as image_file:
+    encoded_image = base64.b64encode(image_file.read()).decode()
+def generate_sound(text,steps,audio_length,negative_prompt):
     print(text)
     # text=translate_text(text)
     text = translate_text(text)
+    negative_prompt = translate_text(negative_prompt)
     #translator = Translator()
     #text=translator.translate(text, src='es',dest="en").text
     print(text)
     waveforms = pipe(text,
+                     num_inference_steps=steps,
+                     audio_length_in_s=audio_length,
+                     negative_prompt = negative_prompt).audios
     rate =16000
     return rate, waveforms[0]
     #return gr.make_waveform((rate, waveforms[0]))
     text = es_en_translator(text)[0].get("translation_text")
     return text
+with gr.Blocks() as demo:
+    gr.Markdown("""
+    <center>
+    <h1>
+    Uso de AI para la generación de sonidos a partir de texto.
+    </h1>
+    <img src='data:image/jpg;base64,{}' width=200px>
+    <h3>
+    Con este espacio podrás generar sondios a partir de texto, intentá ser lo más descriptivo/a posible en el texto. Se puede usar directamente o podés cambiar ajustes, que impacto tiene cada uno está detallado en su descripción. Cambiá valores y mirá los resultados!
+    </h3>
+    <h4>El texto se traduce del español al inglés para alimetnar al modelo, también se puede escribir el texto de entrada en inglés.</h4>
+    </center>
+    """.format(encoded_image))
     with gr.Row():
         with gr.Column():
+            gr.Markdown("Primero debes ingresar el texto para generar el sonido:")
+            with gr.Row():
+                with gr.Column(scale=4):
+                    prompt = gr.Textbox(label="Texo base para generar la imagen") #Give prompt some real estate
+                with gr.Column(scale=1, min_width=50):
+                    btn = gr.Button("Generar") #Submit button side by side!
+            with gr.Row():
+                with gr.Accordion("Opciones avanzadas", open=False): #Let's hide the advanced options!
+                        negative_prompt = gr.Textbox(label="Texto negativo para la generación", info='Al ingresar texto en este campo el modelo intentará alejarse lo mas posible del mismo, este puede ser "baja calidad"')
+                        with gr.Row():
+                            with gr.Column():
+                                audio_len = gr.Slider(label="Duración del sonido", minimum=1, maximum=30, value=5, step = 1,
+                                info="Cuánto mayor sonido, mayor será el tiempo de procesamiento.")
+                                steps = gr.Slider(label="Paos de Inferencia", minimum=1, maximum=100, value=20,step =1 ,
+                                info="Al aumentar los pasos de inferencia se puede acercar más a la descripción del texto pero con un mayor tiempo de procesamiento.")
+                                examples = gr.Examples(inputs=[prompt,negative_prompt],examples=[["Un martillo golpeando madera","low quality"]])
         with gr.Column():
+            output = gr.Audio(label="Resultado") #Move the output up too
+    btn.click(fn=generate_sound, inputs=[prompt,steps,audio_len,negative_prompt], outputs=[output])  #steps,guidance,width,height]
+gr.close_all()
 demo.launch()