File size: 3,232 Bytes
6b28a91
 
 
 
c1af806
6b28a91
 
a9c020a
91f1a24
 
 
 
 
6b28a91
 
 
 
 
 
 
 
ca1a401
6b28a91
9eb21f5
 
6b28a91
 
 
3aaf62a
 
 
 
6b28a91
902a8d1
c298807
6b28a91
 
 
39a196c
 
72c85ef
39a196c
72c85ef
39a196c
72c85ef
8bbfb83
72c85ef
39a196c
72c85ef
0511686
cad8f1b
39a196c
72c85ef
39a196c
 
 
 
6b28a91
 
 
97a0727
3aaf62a
c298807
0cb7b61
0511686
902a8d1
 
 
 
 
 
 
d973a6f
6b28a91
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import gradio as gr
import os
import shutil
import spaces
import sys

# we will clone the repo and install the dependencies
# NOTE: Still fixing bugs, not release, do not try :) !
# os.system('pip install -r qa_mdt/requirements.txt')
# os.system('pip install xformers==0.0.26.post1')
# os.system('pip install torchlibrosa==0.0.9 librosa==0.9.2')
# os.system('pip install -q pytorch_lightning==2.1.3 torchlibrosa==0.0.9 librosa==0.9.2 ftfy==6.1.1 braceexpand')
# os.system('pip install torch==2.3.0+cu121 torchvision==0.18.0+cu121 torchaudio==2.3.0 --index-url https://download.pytorch.org/whl/cu121')

# only then import the necessary modules from qa_mdt
from qa_mdt.pipeline import MOSDiffusionPipeline


pipe = MOSDiffusionPipeline()

# this runs the pipeline with user input and saves the output as 'awesome.wav'
@spaces.GPU(duration=120)
def generate_waveform(description):
    high_quality_description = "high quality " + description
    pipe(high_quality_description)

    generated_file_path = "./awesome.wav"

    # if os.path.exists(generated_file_path):
    #     return generated_file_path
    # else:
    #     return "Error: Failed to generate the waveform."
    if os.path.exists(generated_file_path):
        waveform_video = gr.make_waveform(audio=generated_file_path, fg_alpha=0.7, bg_color="#09090a", bars_color="#00FF00", bar_count=100, bar_width=0.4, animate=True)
        return waveform_video, generated_file_path
    else:
        return "Error: Failed to generate the waveform."


intro = """
# ๐ŸŽถ OpenMusic: Diffusion That Plays Music ๐ŸŽง ๐ŸŽน

Welcome to **OpenMusic**, a next-gen diffusion model designed to generate high-quality music audio from text descriptions! 

Simply enter a few words describing the vibe, and watch as the model generates a unique track for your input. 

Powered by the QA-MDT model, based on the new research paper linked below.

- [GitHub Repo](https://github.com/ivcylc/qa-mdt) by [@changli](https://github.com/ivcylc) ๐ŸŽ“.
- [Paper](https://arxiv.org/pdf/2405.15863) & [Paper Demo](https://qa-mdt.github.io/ )
- [HuggingFace](https://huggingface.co/jadechoghari/qa_mdt) [@jadechoghari](https://github.com/jadechoghari) ๐Ÿค—.

Note: The music generation process will take 1-2 minutes ๐ŸŽถ
---

"""

# gradio interface
iface = gr.Interface(
    fn=generate_waveform,
    inputs=gr.Textbox(lines=2, placeholder="Enter a music description here..."),
    # outputs=gr.Audio(label="Download the Music ๐ŸŽผ"),
    outputs=[gr.Video(label="Watch the Waveform ๐ŸŽผ"), gr.Audio(label="Download the Music ๐ŸŽถ")],
    description=intro,
    examples=[
        ["๐ŸŽน A modern synthesizer creating futuristic soundscapes."],
        ["๐ŸŽธ Acoustic ballad with heartfelt lyrics and soft piano."],
        ["๐Ÿ”Š A deep bassline mixed with upbeat electronic synths, creating a club anthem."],
        ["๐ŸŽป Melodic orchestral composition with a build-up of strings and percussion, evoking cinematic tension."],
        ["๐Ÿ’” Sad song of two lovers who never talk again, starting intensely with emotions and then gradually fading down into silence."]
    ],
    cache_examples="lazy",
    # cache_examples=True
)

# Launch the Gradio app
if __name__ == "__main__":
    iface.launch()