midi-audioldm

Runtime error

App Files Files Community

lauraibnz commited on Jun 12, 2023

Commit

91aeeb9

•

1 Parent(s): 2342079

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -10

app.py CHANGED Viewed

@@ -21,7 +21,7 @@ pipe = pipe.to(device)
 generator = torch.Generator(device)
-def predict(midi_file=None, prompt="", negative_prompt="", audio_length_in_s=5, random_seed=0, controlnet_conditioning_scale=1, num_inference_steps=20, guess_mode=False):
     if isinstance(midi_file, _TemporaryFileWrapper):
         midi_file = midi_file.name
     midi = PrettyMIDI(midi_file)
@@ -34,6 +34,7 @@ def predict(midi_file=None, prompt="", negative_prompt="", audio_length_in_s=5,
         controlnet_conditioning_scale=float(controlnet_conditioning_scale),
         guess_mode=guess_mode,
         generator=generator.manual_seed(int(random_seed)),
     )
     return (16000, audio.audios.T)
@@ -48,19 +49,20 @@ with gr.Blocks(title="🎹 MIDI-AudioLDM", theme=gr.themes.Base(text_size=gr.the
             """)
     with gr.Row():
         with gr.Column(variant='panel'):
-            midi = gr.File(label="midi file", file_types=[".mid"], info="Load the MIDI file that you want to use as conditioning.")
-            prompt = gr.Textbox(label="prompt", info="Enter a descriptive text prompt.")
         with gr.Column(variant='panel'):
             audio = gr.Audio(label="audio")
     with gr.Accordion("Advanced Settings", open=False):
-        neg_prompt = gr.Textbox(label="negative prompt", info="Enter a negative prompt not to guide the audio generation.")
         duration = gr.Slider(0, 30, value=5, step=5, label="duration (seconds)", info="Modify the duration of the output audio file.")
-        seed = gr.Number(value=42, label="seed", info="Change the random seed for a different generation result.")
-        cond = gr.Slider(0.0, 1.0, value=1.0, step=0.1, label="conditioning scale", info="Enter a value between 0 and 1. The larger the more it will take the conditioning into account.")
-        inf = gr.Slider(0, 50, value=20, step=0.1, label="inference steps", info="Edit the number of denoising steps. More inference steps usually leads to better but slower results.")
-        guess = gr.Checkbox(label="guess mode", info="If true, the model will try to recognize the content of the conditioning without the need of a text prompt.")
     btn = gr.Button("Generate")
-    btn.click(predict, inputs=[midi, prompt, neg_prompt, duration, seed, cond, inf, guess], outputs=[audio])
-    gr.Examples(examples=[["S00.mid", "piano", "", 10, 25, 1.0, 20, False], ["S00.mid", "violin", "", 10, 25, 1.0, 20, False], ["S00.mid", "woman singing", "", 10, 25, 0.8, 20, False]], inputs=[midi, prompt, neg_prompt, duration, seed, cond, inf, guess], fn=predict, outputs=audio, cache_examples=True)
 demo.launch()

 generator = torch.Generator(device)
+def predict(midi_file=None, prompt="", negative_prompt="", audio_length_in_s=5, random_seed=0, controlnet_conditioning_scale=1, num_inference_steps=20, guidance_scale=2.5, guess_mode=False):
     if isinstance(midi_file, _TemporaryFileWrapper):
         midi_file = midi_file.name
     midi = PrettyMIDI(midi_file)
         controlnet_conditioning_scale=float(controlnet_conditioning_scale),
         guess_mode=guess_mode,
         generator=generator.manual_seed(int(random_seed)),
+        guidance_scale=float(guidance_scale),
     )
     return (16000, audio.audios.T)
             """)
     with gr.Row():
         with gr.Column(variant='panel'):
+            midi = gr.File(label="midi", file_types=[".mid"])
+            prompt = gr.Textbox(label="prompt", info="Enter a descriptive text prompt to guide the audio generation.")
         with gr.Column(variant='panel'):
             audio = gr.Audio(label="audio")
     with gr.Accordion("Advanced Settings", open=False):
         duration = gr.Slider(0, 30, value=5, step=5, label="duration (seconds)", info="Modify the duration of the output audio file.")
+        inf = gr.Slider(0, 50, value=20, step=0.1, label="inference steps", info="Edit the number of denoising steps. More inference steps usually leads to higher quality but slower results.")
+        guidance_scale = gr.Slider(0, 4, value=2.5, step=0.5, label="guidance scale", info="Modify the guidance scale. The higher the value the more linked is the generated audio to the text prompt, sometimes at the expense of lower quality.")
+        neg_prompt = gr.Textbox(label="negative prompt", info="Optionally enter a negative text prompt not to guide the audio generation.")
+        seed = gr.Number(value=42, label="random seed", info="Change the random seed for a different generation result.")
+        cond = gr.Slider(0.0, 1.0, value=1.0, step=0.1, label="conditioning scale", info="Enter a value between 0 and 1. The larger the more it will take the conditioning into account. Lower values are recommended for more creative prompts.")
+        guess = gr.Checkbox(label="guess mode", info="If selected, the model will try to recognize the content of the MIDI without the need of a text prompt.")
     btn = gr.Button("Generate")
+    btn.click(predict, inputs=[midi, prompt, neg_prompt, duration, seed, cond, inf, guidance_scale, guess], outputs=[audio])
+    # gr.Examples(examples=[["S00.mid", "piano", "", 10, 25, 1.0, 20, 2.5, False], ["S00.mid", "violin", "", 10, 25, 1.0, 20, 2.5, False], ["S00.mid", "woman singing", "", 10, 25, 0.8, 20, 2.5, False]], inputs=[midi, prompt, neg_prompt, duration, seed, cond, inf, guidance_scale, guess], fn=predict, outputs=audio, cache_examples=True)
 demo.launch()