wrice commited on
Commit
f939eb0
1 Parent(s): c13028f

Add streaming audio loading and writing

Browse files
Files changed (2) hide show
  1. app.py +35 -28
  2. requirements.txt +1 -1
app.py CHANGED
@@ -1,4 +1,7 @@
1
  """Gradio demo for denoisers."""
 
 
 
2
  import gradio as gr
3
  import numpy as np
4
  import torch
@@ -13,53 +16,57 @@ MODELS = [
13
  ]
14
 
15
 
16
- def denoise(model_name, inputs):
17
  """Denoise audio."""
18
  if "unet1d" in model_name:
19
  model = UNet1DModel.from_pretrained(model_name)
20
  else:
21
  model = WaveUNetModel.from_pretrained(model_name)
22
- sr, audio = inputs
23
- audio = torch.from_numpy(audio)
24
- audio = audio / 32768.0
25
 
26
- if audio.ndim == 1:
27
- audio = audio.unsqueeze(0)
28
 
29
- print(f"Audio shape: {audio.shape}")
30
- print(f"Sample rate: {sr}")
 
 
 
 
31
 
32
- if audio.shape[0] > 1:
33
- audio = audio.mean(0, keepdim=True)
34
 
35
- print(f"Audio shape: {audio.shape}")
36
 
37
- if sr != model.config.sample_rate:
38
- audio = torchaudio.functional.resample(audio, sr, model.config.sample_rate)
 
 
39
 
40
- chunk_size = model.config.max_length
 
41
 
42
- padding = abs(audio.size(-1) % chunk_size - chunk_size)
43
- padded = torch.nn.functional.pad(audio, (0, padding))
 
44
 
45
- clean = []
46
- for i in tqdm(range(0, padded.shape[-1], chunk_size)):
47
- audio_chunk = padded[:, :, i : i + chunk_size]
48
- with torch.no_grad():
49
- clean_chunk = model(audio_chunk).audio
50
- clean.append(clean_chunk.squeeze(0))
51
 
52
- denoised = torch.concat(clean, 1)[:, : audio.shape[-1]].clamp(-1.0, 1.0)
53
- denoised = (denoised * 32767.0).numpy().astype(np.int16)
 
54
 
55
- print(f"Denoised shape: {denoised.shape}")
 
 
56
 
57
- return model.config.sample_rate, denoised.transpose()
58
 
59
 
60
  iface = gr.Interface(
61
  fn=denoise,
62
- inputs=[gr.Dropdown(choices=MODELS, value=MODELS[0]), "audio"],
63
- outputs="audio",
64
  )
65
  iface.launch()
 
1
  """Gradio demo for denoisers."""
2
+ import tempfile
3
+ from pathlib import Path
4
+
5
  import gradio as gr
6
  import numpy as np
7
  import torch
 
16
  ]
17
 
18
 
19
+ def denoise(model_name: str, audio_path: str):
20
  """Denoise audio."""
21
  if "unet1d" in model_name:
22
  model = UNet1DModel.from_pretrained(model_name)
23
  else:
24
  model = WaveUNetModel.from_pretrained(model_name)
 
 
 
25
 
26
+ if torch.cuda.is_available():
27
+ model = model.cuda()
28
 
29
+ stream_reader = torchaudio.io.StreamReader(audio_path)
30
+ stream_reader.add_basic_audio_stream(
31
+ frames_per_chunk=model.config.max_length,
32
+ sample_rate=model.config.sample_rate,
33
+ num_channels=1,
34
+ )
35
 
36
+ stream_writer = torchaudio.io.StreamWriter("denoised.wav")
37
+ stream_writer.add_audio_stream(sample_rate=model.config.sample_rate, num_channels=1)
38
 
39
+ chunk_size = model.config.max_length
40
 
41
+ with stream_writer.open():
42
+ for (audio_chunk,) in tqdm(stream_reader.stream()):
43
+ if audio_chunk is None:
44
+ break
45
 
46
+ audio_chunk = audio_chunk.permute(1, 0)
47
+ original_chunk_size = audio_chunk.size(-1)
48
 
49
+ if audio_chunk.size(-1) < chunk_size:
50
+ padding = chunk_size - audio_chunk.size(-1)
51
+ audio_chunk = torch.nn.functional.pad(audio_chunk, (0, padding))
52
 
53
+ if torch.cuda.is_available():
54
+ audio_chunk = audio_chunk.cuda()
 
 
 
 
55
 
56
+ with torch.no_grad():
57
+ denoised_chunk = model(audio_chunk[None]).audio
58
+ denoised_chunk = denoised_chunk[:, :, :original_chunk_size]
59
 
60
+ stream_writer.write_audio_chunk(
61
+ 0, denoised_chunk.squeeze(0).permute(1, 0).cpu()
62
+ )
63
 
64
+ return "denoised.wav"
65
 
66
 
67
  iface = gr.Interface(
68
  fn=denoise,
69
+ inputs=[gr.Dropdown(choices=MODELS, value=MODELS[0]), gr.Audio(type="filepath")],
70
+ outputs=gr.Audio(type="filepath"),
71
  )
72
  iface.launch()
requirements.txt CHANGED
@@ -3,7 +3,7 @@ torch
3
  torchaudio
4
  pytorch-lightning
5
  pedalboard
6
- denoisers
7
  transformers
8
  librosa
9
  wandb
 
3
  torchaudio
4
  pytorch-lightning
5
  pedalboard
6
+ denoisers==0.1.7
7
  transformers
8
  librosa
9
  wandb