Spaces:
Runtime error
Runtime error
import gradio as gr | |
import torchaudio | |
import torch | |
import os | |
from pydub import AudioSegment | |
import tempfile | |
from speechbrain.pretrained.separation import SepformerSeparation | |
class AudioDenoiser: | |
def __init__(self): | |
# Initialize the SepFormer model for audio enhancement | |
self.model = SepformerSeparation.from_hparams( | |
source="speechbrain/sepformer-dns4-16k-enhancement", | |
savedir='pretrained_models/sepformer-dns4-16k-enhancement' | |
) | |
# Create output directory if it doesn't exist | |
os.makedirs("enhanced_audio", exist_ok=True) | |
def convert_audio_to_wav(self, input_path): | |
""" | |
Convert any audio format to WAV with proper settings | |
Args: | |
input_path (str): Path to input audio file | |
Returns: | |
str: Path to converted WAV file | |
""" | |
try: | |
# Create a temporary file for the converted audio | |
temp_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False) | |
temp_wav_path = temp_wav.name | |
# Load audio using pydub (supports multiple formats) | |
audio = AudioSegment.from_file(input_path) | |
# Convert to mono if stereo | |
if audio.channels > 1: | |
audio = audio.set_channels(1) | |
# Export as WAV with proper settings | |
audio.export( | |
temp_wav_path, | |
format='wav', | |
parameters=[ | |
'-ar', '16000', # Set sample rate to 16kHz | |
'-ac', '1' # Set channels to mono | |
] | |
) | |
return temp_wav_path | |
except Exception as e: | |
raise gr.Error(f"Error converting audio format: {str(e)}") | |
def enhance_audio(self, audio_path): | |
""" | |
Process the input audio file and return the enhanced version | |
Args: | |
audio_path (str): Path to the input audio file | |
Returns: | |
str: Path to the enhanced audio file | |
""" | |
try: | |
# Convert input audio to proper WAV format | |
wav_path = self.convert_audio_to_wav(audio_path) | |
# Separate and enhance the audio | |
est_sources = self.model.separate_file(path=wav_path) | |
# Generate output filename | |
output_path = os.path.join("enhanced_audio", "enhanced_audio.wav") | |
# Save the enhanced audio | |
torchaudio.save( | |
output_path, | |
est_sources[:, :, 0].detach().cpu(), | |
16000 # Sample rate | |
) | |
# Clean up temporary file | |
os.unlink(wav_path) | |
return output_path | |
except Exception as e: | |
raise gr.Error(f"Error processing audio: {str(e)}") | |
def create_gradio_interface(): | |
# Initialize the denoiser | |
denoiser = AudioDenoiser() | |
# Create the Gradio interface | |
interface = gr.Interface( | |
fn=denoiser.enhance_audio, | |
inputs=gr.Audio( | |
type="filepath", | |
label="Upload Noisy Audio" | |
), | |
outputs=gr.Audio( | |
label="Enhanced Audio", | |
type="filepath" | |
), | |
title="Audio Denoising using SepFormer", | |
description=""" | |
This application uses the SepFormer model from SpeechBrain to enhance audio quality | |
by removing background noise. Supports various audio formats including MP3 and WAV. | |
""", | |
article=""" | |
Supported audio formats: | |
- MP3 | |
- WAV | |
- OGG | |
- FLAC | |
- M4A | |
and more... | |
The audio will automatically be converted to the correct format for processing. | |
""" | |
) | |
return interface | |
if __name__ == "__main__": | |
# Create and launch the interface | |
demo = create_gradio_interface() | |
demo.launch() |