lyimo commited on
Commit
95facbc
1 Parent(s): 518eabe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +123 -37
app.py CHANGED
@@ -1,44 +1,130 @@
1
  import gradio as gr
2
- import torch
3
  import torchaudio
4
- from speechbrain.inference.separation import SepformerSeparation as separator
5
  import os
 
 
 
6
 
7
- # Load the enhancement model
8
- model = separator.from_hparams(
9
- source="speechbrain/sepformer-dns4-16k-enhancement",
10
- savedir='pretrained_models/sepformer-dns4-16k-enhancement'
11
- )
12
-
13
- # Define the enhancement function
14
- def enhance_audio(noisy_audio):
15
- # Convert MP3 to WAV
16
- wav_audio = "temp_audio.wav"
17
- torchaudio.save(wav_audio, *torchaudio.load(noisy_audio))
18
-
19
- # Load and add a batch dimension to the audio tensor
20
- noisy = model.load_audio(wav_audio).unsqueeze(0)
21
-
22
- # Enhance the audio
23
- enhanced = model.enhance_batch(noisy, lengths=torch.tensor([1.0]))
24
-
25
- # Save enhanced audio to a file
26
- enhanced_path = "enhanced.wav"
27
- torchaudio.save(enhanced_path, enhanced.cpu(), 16000)
28
 
29
- # Clean up the temporary audio file
30
- os.remove(wav_audio)
31
-
32
- return enhanced_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
- # Create the Gradio interface
35
- interface = gr.Interface(
36
- fn=enhance_audio,
37
- inputs=gr.Audio(type="filepath", label="Upload Noisy Audio"),
38
- outputs=gr.Audio(type="filepath", label="Enhanced Audio"),
39
- title="Speech Enhancement App",
40
- description="Upload a noisy audio file to enhance the quality. The enhanced audio can be downloaded after processing."
41
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
- # Launch the Gradio app with public link enabled
44
- interface.launch(share=True)
 
 
 
1
  import gradio as gr
 
2
  import torchaudio
3
+ import torch
4
  import os
5
+ from pydub import AudioSegment
6
+ import tempfile
7
+ from speechbrain.pretrained.separation import SepformerSeparation
8
 
9
+ class AudioDenoiser:
10
+ def __init__(self):
11
+ # Initialize the SepFormer model for audio enhancement
12
+ self.model = SepformerSeparation.from_hparams(
13
+ source="speechbrain/sepformer-dns4-16k-enhancement",
14
+ savedir='pretrained_models/sepformer-dns4-16k-enhancement'
15
+ )
16
+
17
+ # Create output directory if it doesn't exist
18
+ os.makedirs("enhanced_audio", exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
19
 
20
+ def convert_audio_to_wav(self, input_path):
21
+ """
22
+ Convert any audio format to WAV with proper settings
23
+
24
+ Args:
25
+ input_path (str): Path to input audio file
26
+
27
+ Returns:
28
+ str: Path to converted WAV file
29
+ """
30
+ try:
31
+ # Create a temporary file for the converted audio
32
+ temp_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
33
+ temp_wav_path = temp_wav.name
34
+
35
+ # Load audio using pydub (supports multiple formats)
36
+ audio = AudioSegment.from_file(input_path)
37
+
38
+ # Convert to mono if stereo
39
+ if audio.channels > 1:
40
+ audio = audio.set_channels(1)
41
+
42
+ # Export as WAV with proper settings
43
+ audio.export(
44
+ temp_wav_path,
45
+ format='wav',
46
+ parameters=[
47
+ '-ar', '16000', # Set sample rate to 16kHz
48
+ '-ac', '1' # Set channels to mono
49
+ ]
50
+ )
51
+
52
+ return temp_wav_path
53
+
54
+ except Exception as e:
55
+ raise gr.Error(f"Error converting audio format: {str(e)}")
56
+
57
+ def enhance_audio(self, audio_path):
58
+ """
59
+ Process the input audio file and return the enhanced version
60
+
61
+ Args:
62
+ audio_path (str): Path to the input audio file
63
+
64
+ Returns:
65
+ str: Path to the enhanced audio file
66
+ """
67
+ try:
68
+ # Convert input audio to proper WAV format
69
+ wav_path = self.convert_audio_to_wav(audio_path)
70
+
71
+ # Separate and enhance the audio
72
+ est_sources = self.model.separate_file(path=wav_path)
73
+
74
+ # Generate output filename
75
+ output_path = os.path.join("enhanced_audio", "enhanced_audio.wav")
76
+
77
+ # Save the enhanced audio
78
+ torchaudio.save(
79
+ output_path,
80
+ est_sources[:, :, 0].detach().cpu(),
81
+ 16000 # Sample rate
82
+ )
83
+
84
+ # Clean up temporary file
85
+ os.unlink(wav_path)
86
+
87
+ return output_path
88
+
89
+ except Exception as e:
90
+ raise gr.Error(f"Error processing audio: {str(e)}")
91
 
92
+ def create_gradio_interface():
93
+ # Initialize the denoiser
94
+ denoiser = AudioDenoiser()
95
+
96
+ # Create the Gradio interface
97
+ interface = gr.Interface(
98
+ fn=denoiser.enhance_audio,
99
+ inputs=gr.Audio(
100
+ type="filepath",
101
+ label="Upload Noisy Audio"
102
+ ),
103
+ outputs=gr.Audio(
104
+ label="Enhanced Audio",
105
+ type="filepath"
106
+ ),
107
+ title="Audio Denoising using SepFormer",
108
+ description="""
109
+ This application uses the SepFormer model from SpeechBrain to enhance audio quality
110
+ by removing background noise. Supports various audio formats including MP3 and WAV.
111
+ """,
112
+ article="""
113
+ Supported audio formats:
114
+ - MP3
115
+ - WAV
116
+ - OGG
117
+ - FLAC
118
+ - M4A
119
+ and more...
120
+
121
+ The audio will automatically be converted to the correct format for processing.
122
+ """
123
+ )
124
+
125
+ return interface
126
 
127
+ if __name__ == "__main__":
128
+ # Create and launch the interface
129
+ demo = create_gradio_interface()
130
+ demo.launch()