Bango lingo commited on
Commit
3d41f57
β€’
1 Parent(s): 23a53d8

Upload 2 files

Browse files
Files changed (2) hide show
  1. gradio_app.py +131 -0
  2. requirements.txt +9 -0
gradio_app.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchaudio
3
+ from einops import rearrange
4
+ from stable_audio_tools import get_pretrained_model
5
+ from stable_audio_tools.inference.generation import generate_diffusion_cond
6
+ from pydub import AudioSegment
7
+ import re
8
+ import os
9
+ from datetime import datetime
10
+ import gradio as gr
11
+
12
+ # Define the function to generate audio based on a prompt
13
+ def generate_audio(prompt, steps, cfg_scale, sigma_min, sigma_max, generation_time, seed, sampler_type):
14
+ device = "cuda" if torch.cuda.is_available() else "cpu"
15
+
16
+ # Download model
17
+ model, model_config = get_pretrained_model("audo/stable-audio-open-1.0")
18
+ sample_rate = model_config["sample_rate"]
19
+ sample_size = model_config["sample_size"]
20
+
21
+ model = model.to(device)
22
+
23
+ # Set up text and timing conditioning
24
+ conditioning = [{
25
+ "prompt": prompt,
26
+ "seconds_start": 0,
27
+ "seconds_total": generation_time
28
+ }]
29
+
30
+ # Generate stereo audio
31
+ output = generate_diffusion_cond(
32
+ model,
33
+ steps=steps,
34
+ cfg_scale=cfg_scale,
35
+ conditioning=conditioning,
36
+ sample_size=sample_size,
37
+ sigma_min=sigma_min,
38
+ sigma_max=sigma_max,
39
+ sampler_type=sampler_type,
40
+ device=device,
41
+ seed=seed
42
+ )
43
+
44
+ # Rearrange audio batch to a single sequence
45
+ output = rearrange(output, "b d n -> d (b n)")
46
+
47
+ # Peak normalize, clip, convert to int16, and save to temporary file
48
+ output = output.to(torch.float32).div(torch.max(torch.abs(output))).clamp(-1, 1).mul(32767).to(torch.int16).cpu()
49
+ torchaudio.save("temp_output.wav", output, sample_rate)
50
+
51
+ # Convert to MP3 format using pydub
52
+ audio = AudioSegment.from_wav("temp_output.wav")
53
+
54
+ # Create Output folder and dated subfolder if they do not exist
55
+ output_folder = "Output"
56
+ date_folder = datetime.now().strftime("%Y-%m-%d")
57
+ save_path = os.path.join(output_folder, date_folder)
58
+ os.makedirs(save_path, exist_ok=True)
59
+
60
+ # Generate a filename based on the prompt
61
+ filename = re.sub(r'\W+', '_', prompt) + ".mp3" # Replace non-alphanumeric characters with underscores
62
+ full_path = os.path.join(save_path, filename)
63
+
64
+ # Ensure the filename is unique by appending a number if the file already exists
65
+ base_filename = filename
66
+ counter = 1
67
+ while os.path.exists(full_path):
68
+ filename = f"{base_filename[:-4]}_{counter}.mp3"
69
+ full_path = os.path.join(save_path, filename)
70
+ counter += 1
71
+
72
+ # Export the audio to MP3 format
73
+ audio.export(full_path, format="mp3")
74
+
75
+ return full_path
76
+
77
+ def audio_generator(prompt, sampler_type, steps, cfg_scale, sigma_min, sigma_max, generation_time, seed):
78
+ try:
79
+ print("Generating audio with parameters:")
80
+ print("Prompt:", prompt)
81
+ print("Sampler Type:", sampler_type)
82
+ print("Steps:", steps)
83
+ print("CFG Scale:", cfg_scale)
84
+ print("Sigma Min:", sigma_min)
85
+ print("Sigma Max:", sigma_max)
86
+ print("Generation Time:", generation_time)
87
+ print("Seed:", seed)
88
+
89
+ filename = generate_audio(prompt, steps, cfg_scale, sigma_min, sigma_max, generation_time, seed, sampler_type)
90
+ return gr.Audio(filename), f"Generated: {filename}"
91
+ except Exception as e:
92
+ return str(e)
93
+
94
+ # Create Gradio interface
95
+ prompt_textbox = gr.Textbox(lines=5, label="Prompt")
96
+ sampler_dropdown = gr.Dropdown(
97
+ label="Sampler Type",
98
+ choices=[
99
+ "dpmpp-3m-sde",
100
+ "dpmpp-2m-sde",
101
+ "k-heun",
102
+ "k-lms",
103
+ "k-dpmpp-2s-ancestral",
104
+ "k-dpm-2",
105
+ "k-dpm-fast"
106
+ ],
107
+ value="dpmpp-3m-sde"
108
+ )
109
+ steps_slider = gr.Slider(minimum=0, maximum=200, label="Steps", step=1)
110
+ steps_slider.value = 100 # Set the default value here
111
+ cfg_scale_slider = gr.Slider(minimum=0, maximum=15, label="CFG Scale", step=0.1)
112
+ cfg_scale_slider.value = 7 # Set the default value here
113
+ sigma_min_slider = gr.Slider(minimum=0, maximum=50, label="Sigma Min", step=0.1, value=0.3)
114
+ sigma_max_slider = gr.Slider(minimum=0, maximum=1000, label="Sigma Max", step=1, value=500)
115
+ generation_time_slider = gr.Slider(minimum=0, maximum=47, label="Generation Time (seconds)", step=1)
116
+ generation_time_slider.value = 47 # Set the default value here
117
+ seed_slider = gr.Slider(minimum=-1, maximum=999999, label="Seed", step=1)
118
+ seed_slider.value = 77212 # Set the default value here
119
+
120
+ output_textbox = gr.Textbox(label="Output")
121
+
122
+ title = "πŸ’€πŸ”Š StableAudioWebUI πŸ’€πŸ”Š"
123
+ description = "[Github Repository](https://github.com/Saganaki22/StableAudioWebUI)"
124
+
125
+ gr.Interface(
126
+ audio_generator,
127
+ [prompt_textbox, sampler_dropdown, steps_slider, cfg_scale_slider, sigma_min_slider, sigma_max_slider, generation_time_slider, seed_slider],
128
+ [gr.Audio(), output_textbox],
129
+ title=title,
130
+ description=description
131
+ ).launch()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ torchvision
3
+ torchaudio
4
+ gradio
5
+ einops
6
+ stable_audio_tools
7
+ pydub
8
+
9
+