Spaces:
Running
Running
Add streaming audio loading and writing
Browse files- app.py +35 -28
- requirements.txt +1 -1
app.py
CHANGED
@@ -1,4 +1,7 @@
|
|
1 |
"""Gradio demo for denoisers."""
|
|
|
|
|
|
|
2 |
import gradio as gr
|
3 |
import numpy as np
|
4 |
import torch
|
@@ -13,53 +16,57 @@ MODELS = [
|
|
13 |
]
|
14 |
|
15 |
|
16 |
-
def denoise(model_name,
|
17 |
"""Denoise audio."""
|
18 |
if "unet1d" in model_name:
|
19 |
model = UNet1DModel.from_pretrained(model_name)
|
20 |
else:
|
21 |
model = WaveUNetModel.from_pretrained(model_name)
|
22 |
-
sr, audio = inputs
|
23 |
-
audio = torch.from_numpy(audio)
|
24 |
-
audio = audio / 32768.0
|
25 |
|
26 |
-
if
|
27 |
-
|
28 |
|
29 |
-
|
30 |
-
|
|
|
|
|
|
|
|
|
31 |
|
32 |
-
|
33 |
-
|
34 |
|
35 |
-
|
36 |
|
37 |
-
|
38 |
-
|
|
|
|
|
39 |
|
40 |
-
|
|
|
41 |
|
42 |
-
|
43 |
-
|
|
|
44 |
|
45 |
-
|
46 |
-
|
47 |
-
audio_chunk = padded[:, :, i : i + chunk_size]
|
48 |
-
with torch.no_grad():
|
49 |
-
clean_chunk = model(audio_chunk).audio
|
50 |
-
clean.append(clean_chunk.squeeze(0))
|
51 |
|
52 |
-
|
53 |
-
|
|
|
54 |
|
55 |
-
|
|
|
|
|
56 |
|
57 |
-
return
|
58 |
|
59 |
|
60 |
iface = gr.Interface(
|
61 |
fn=denoise,
|
62 |
-
inputs=[gr.Dropdown(choices=MODELS, value=MODELS[0]), "
|
63 |
-
outputs="
|
64 |
)
|
65 |
iface.launch()
|
|
|
1 |
"""Gradio demo for denoisers."""
|
2 |
+
import tempfile
|
3 |
+
from pathlib import Path
|
4 |
+
|
5 |
import gradio as gr
|
6 |
import numpy as np
|
7 |
import torch
|
|
|
16 |
]
|
17 |
|
18 |
|
19 |
+
def denoise(model_name: str, audio_path: str):
|
20 |
"""Denoise audio."""
|
21 |
if "unet1d" in model_name:
|
22 |
model = UNet1DModel.from_pretrained(model_name)
|
23 |
else:
|
24 |
model = WaveUNetModel.from_pretrained(model_name)
|
|
|
|
|
|
|
25 |
|
26 |
+
if torch.cuda.is_available():
|
27 |
+
model = model.cuda()
|
28 |
|
29 |
+
stream_reader = torchaudio.io.StreamReader(audio_path)
|
30 |
+
stream_reader.add_basic_audio_stream(
|
31 |
+
frames_per_chunk=model.config.max_length,
|
32 |
+
sample_rate=model.config.sample_rate,
|
33 |
+
num_channels=1,
|
34 |
+
)
|
35 |
|
36 |
+
stream_writer = torchaudio.io.StreamWriter("denoised.wav")
|
37 |
+
stream_writer.add_audio_stream(sample_rate=model.config.sample_rate, num_channels=1)
|
38 |
|
39 |
+
chunk_size = model.config.max_length
|
40 |
|
41 |
+
with stream_writer.open():
|
42 |
+
for (audio_chunk,) in tqdm(stream_reader.stream()):
|
43 |
+
if audio_chunk is None:
|
44 |
+
break
|
45 |
|
46 |
+
audio_chunk = audio_chunk.permute(1, 0)
|
47 |
+
original_chunk_size = audio_chunk.size(-1)
|
48 |
|
49 |
+
if audio_chunk.size(-1) < chunk_size:
|
50 |
+
padding = chunk_size - audio_chunk.size(-1)
|
51 |
+
audio_chunk = torch.nn.functional.pad(audio_chunk, (0, padding))
|
52 |
|
53 |
+
if torch.cuda.is_available():
|
54 |
+
audio_chunk = audio_chunk.cuda()
|
|
|
|
|
|
|
|
|
55 |
|
56 |
+
with torch.no_grad():
|
57 |
+
denoised_chunk = model(audio_chunk[None]).audio
|
58 |
+
denoised_chunk = denoised_chunk[:, :, :original_chunk_size]
|
59 |
|
60 |
+
stream_writer.write_audio_chunk(
|
61 |
+
0, denoised_chunk.squeeze(0).permute(1, 0).cpu()
|
62 |
+
)
|
63 |
|
64 |
+
return "denoised.wav"
|
65 |
|
66 |
|
67 |
iface = gr.Interface(
|
68 |
fn=denoise,
|
69 |
+
inputs=[gr.Dropdown(choices=MODELS, value=MODELS[0]), gr.Audio(type="filepath")],
|
70 |
+
outputs=gr.Audio(type="filepath"),
|
71 |
)
|
72 |
iface.launch()
|
requirements.txt
CHANGED
@@ -3,7 +3,7 @@ torch
|
|
3 |
torchaudio
|
4 |
pytorch-lightning
|
5 |
pedalboard
|
6 |
-
denoisers
|
7 |
transformers
|
8 |
librosa
|
9 |
wandb
|
|
|
3 |
torchaudio
|
4 |
pytorch-lightning
|
5 |
pedalboard
|
6 |
+
denoisers==0.1.7
|
7 |
transformers
|
8 |
librosa
|
9 |
wandb
|