lyimo commited on
Commit
a097442
1 Parent(s): 3d8b9ae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -124
app.py CHANGED
@@ -1,130 +1,36 @@
1
  import gradio as gr
2
- import torchaudio
3
  import torch
4
- import os
5
- from pydub import AudioSegment
6
- import tempfile
7
- from speechbrain.pretrained.separation import SepformerSeparation
8
 
9
- class AudioDenoiser:
10
- def __init__(self):
11
- # Initialize the SepFormer model for audio enhancement
12
- self.model = SepformerSeparation.from_hparams(
13
- source="speechbrain/sepformer-dns4-16k-enhancement",
14
  savedir='pretrained_models/sepformer-dns4-16k-enhancement'
15
- )
16
-
17
- # Create output directory if it doesn't exist
18
- os.makedirs("enhanced_audio", exist_ok=True)
19
-
20
- def convert_audio_to_wav(self, input_path):
21
- """
22
- Convert any audio format to WAV with proper settings
23
-
24
- Args:
25
- input_path (str): Path to input audio file
26
-
27
- Returns:
28
- str: Path to converted WAV file
29
- """
30
- try:
31
- # Create a temporary file for the converted audio
32
- temp_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
33
- temp_wav_path = temp_wav.name
34
-
35
- # Load audio using pydub (supports multiple formats)
36
- audio = AudioSegment.from_file(input_path)
37
-
38
- # Convert to mono if stereo
39
- if audio.channels > 1:
40
- audio = audio.set_channels(1)
41
-
42
- # Export as WAV with proper settings
43
- audio.export(
44
- temp_wav_path,
45
- format='wav',
46
- parameters=[
47
- '-ar', '16000', # Set sample rate to 16kHz
48
- '-ac', '1' # Set channels to mono
49
- ]
50
- )
51
-
52
- return temp_wav_path
53
-
54
- except Exception as e:
55
- raise gr.Error(f"Error converting audio format: {str(e)}")
56
-
57
- def enhance_audio(self, audio_path):
58
- """
59
- Process the input audio file and return the enhanced version
60
-
61
- Args:
62
- audio_path (str): Path to the input audio file
63
-
64
- Returns:
65
- str: Path to the enhanced audio file
66
- """
67
- try:
68
- # Convert input audio to proper WAV format
69
- wav_path = self.convert_audio_to_wav(audio_path)
70
-
71
- # Separate and enhance the audio
72
- est_sources = self.model.separate_file(path=wav_path)
73
-
74
- # Generate output filename
75
- output_path = os.path.join("enhanced_audio", "enhanced_audio.wav")
76
-
77
- # Save the enhanced audio
78
- torchaudio.save(
79
- output_path,
80
- est_sources[:, :, 0].detach().cpu(),
81
- 16000 # Sample rate
82
- )
83
-
84
- # Clean up temporary file
85
- os.unlink(wav_path)
86
-
87
- return output_path
88
-
89
- except Exception as e:
90
- raise gr.Error(f"Error processing audio: {str(e)}")
91
 
92
- def create_gradio_interface():
93
- # Initialize the denoiser
94
- denoiser = AudioDenoiser()
95
-
96
- # Create the Gradio interface
97
- interface = gr.Interface(
98
- fn=denoiser.enhance_audio,
99
- inputs=gr.Audio(
100
- type="filepath",
101
- label="Upload Noisy Audio"
102
- ),
103
- outputs=gr.Audio(
104
- label="Enhanced Audio",
105
- type="filepath"
106
- ),
107
- title="Audio Denoising using SepFormer",
108
- description="""
109
- This application uses the SepFormer model from SpeechBrain to enhance audio quality
110
- by removing background noise. Supports various audio formats including MP3 and WAV.
111
- """,
112
- article="""
113
- Supported audio formats:
114
- - MP3
115
- - WAV
116
- - OGG
117
- - FLAC
118
- - M4A
119
- and more...
120
-
121
- The audio will automatically be converted to the correct format for processing.
122
- """
123
- )
124
-
125
- return interface
126
 
127
- if __name__ == "__main__":
128
- # Create and launch the interface
129
- demo = create_gradio_interface()
130
- demo.launch()
 
1
  import gradio as gr
 
2
  import torch
3
+ import torchaudio
4
+ from speechbrain.inference.enhancement import SpectralMaskEnhancement
 
 
5
 
6
+ # Load the enhancement model
7
+ model = separator.from_hparams(
8
+ source="speechbrain/sepformer-dns4-16k-enhancement",
 
 
9
  savedir='pretrained_models/sepformer-dns4-16k-enhancement'
10
+ )
11
+
12
+ # Define the enhancement function
13
+ def enhance_audio(noisy_audio):
14
+ # Load and add a batch dimension to the audio tensor
15
+ noisy = enhance_model.load_audio(noisy_audio).unsqueeze(0)
16
+
17
+ # Enhance the audio
18
+ enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.0]))
19
+
20
+ # Save enhanced audio to a temporary file
21
+ enhanced_path = "enhanced.wav"
22
+ torchaudio.save(enhanced_path, enhanced.cpu(), 16000)
23
+
24
+ return enhanced_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
+ # Create the Gradio interface
27
+ interface = gr.Interface(
28
+ fn=enhance_audio,
29
+ inputs=gr.Audio(type="filepath", label="Upload Noisy Audio"),
30
+ outputs=gr.Audio(type="filepath", label="Enhanced Audio"),
31
+ title="Speech Enhancement App",
32
+ description="Upload a noisy audio file to enhance the quality. The enhanced audio can be downloaded after processing."
33
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
+ # Launch the Gradio app
36
+ interface.launch()