File size: 3,232 Bytes
6b28a91 c1af806 6b28a91 a9c020a 91f1a24 6b28a91 ca1a401 6b28a91 9eb21f5 6b28a91 3aaf62a 6b28a91 902a8d1 c298807 6b28a91 39a196c 72c85ef 39a196c 72c85ef 39a196c 72c85ef 8bbfb83 72c85ef 39a196c 72c85ef 0511686 cad8f1b 39a196c 72c85ef 39a196c 6b28a91 97a0727 3aaf62a c298807 0cb7b61 0511686 902a8d1 d973a6f 6b28a91 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import gradio as gr
import os
import shutil
import spaces
import sys
# we will clone the repo and install the dependencies
# NOTE: Still fixing bugs, not release, do not try :) !
# os.system('pip install -r qa_mdt/requirements.txt')
# os.system('pip install xformers==0.0.26.post1')
# os.system('pip install torchlibrosa==0.0.9 librosa==0.9.2')
# os.system('pip install -q pytorch_lightning==2.1.3 torchlibrosa==0.0.9 librosa==0.9.2 ftfy==6.1.1 braceexpand')
# os.system('pip install torch==2.3.0+cu121 torchvision==0.18.0+cu121 torchaudio==2.3.0 --index-url https://download.pytorch.org/whl/cu121')
# only then import the necessary modules from qa_mdt
from qa_mdt.pipeline import MOSDiffusionPipeline
pipe = MOSDiffusionPipeline()
# this runs the pipeline with user input and saves the output as 'awesome.wav'
@spaces.GPU(duration=120)
def generate_waveform(description):
high_quality_description = "high quality " + description
pipe(high_quality_description)
generated_file_path = "./awesome.wav"
# if os.path.exists(generated_file_path):
# return generated_file_path
# else:
# return "Error: Failed to generate the waveform."
if os.path.exists(generated_file_path):
waveform_video = gr.make_waveform(audio=generated_file_path, fg_alpha=0.7, bg_color="#09090a", bars_color="#00FF00", bar_count=100, bar_width=0.4, animate=True)
return waveform_video, generated_file_path
else:
return "Error: Failed to generate the waveform."
intro = """
# ๐ถ OpenMusic: Diffusion That Plays Music ๐ง ๐น
Welcome to **OpenMusic**, a next-gen diffusion model designed to generate high-quality music audio from text descriptions!
Simply enter a few words describing the vibe, and watch as the model generates a unique track for your input.
Powered by the QA-MDT model, based on the new research paper linked below.
- [GitHub Repo](https://github.com/ivcylc/qa-mdt) by [@changli](https://github.com/ivcylc) ๐.
- [Paper](https://arxiv.org/pdf/2405.15863) & [Paper Demo](https://qa-mdt.github.io/ )
- [HuggingFace](https://huggingface.co/jadechoghari/qa_mdt) [@jadechoghari](https://github.com/jadechoghari) ๐ค.
Note: The music generation process will take 1-2 minutes ๐ถ
---
"""
# gradio interface
iface = gr.Interface(
fn=generate_waveform,
inputs=gr.Textbox(lines=2, placeholder="Enter a music description here..."),
# outputs=gr.Audio(label="Download the Music ๐ผ"),
outputs=[gr.Video(label="Watch the Waveform ๐ผ"), gr.Audio(label="Download the Music ๐ถ")],
description=intro,
examples=[
["๐น A modern synthesizer creating futuristic soundscapes."],
["๐ธ Acoustic ballad with heartfelt lyrics and soft piano."],
["๐ A deep bassline mixed with upbeat electronic synths, creating a club anthem."],
["๐ป Melodic orchestral composition with a build-up of strings and percussion, evoking cinematic tension."],
["๐ Sad song of two lovers who never talk again, starting intensely with emotions and then gradually fading down into silence."]
],
cache_examples="lazy",
# cache_examples=True
)
# Launch the Gradio app
if __name__ == "__main__":
iface.launch()
|