VoiceRestore / app.py
jadechoghari's picture
add new ckpt
c5d5c61
raw
history blame
2.26 kB
import torch
import gradio as gr
import torchaudio
from transformers import AutoModel
import spaces
checkpoint_path = "./"
model = AutoModel.from_pretrained(checkpoint_path, trust_remote_code=True)
@spaces.GPU()
def restore_audio(input_audio):
# Load the audio file
waveform, sample_rate = torchaudio.load(input_audio)
# Calculate the duration of the audio (in seconds)
duration = waveform.shape[1] / sample_rate
# Output file path
output_path = "restored_output.wav"
if duration > 10:
model(input_audio, output_path, short=False)
else:
model(input_audio, output_path) # short=True by default
return output_path
with gr.Blocks() as demo:
gr.Markdown("# πŸ”Š Voice Restoration with Transformer-based Model")
gr.Markdown(
"""
Upload a degraded audio file or select an example, and the space will restore it using the **VoiceRestore** model!
Based on this [repo](https://github.com/skirdey/voicerestore) by [@Stan Kirdey](https://github.com/skirdey),
and the HF Transformers πŸ€— [Model](https://huggingface.co/jadechoghari/VoiceRestore) by [@jadechoghari](https://github.com/jadechoghari).
The model returns optimized results for audio less than 10 seconds, however, it supports unlimited duration!
"""
)
with gr.Row():
with gr.Column():
gr.Markdown("### 🎧 Select an Example or Upload Your Audio:")
input_audio = gr.Audio(label="Upload Degraded Audio", type="filepath")
gr.Examples(
examples=["example_input.wav", "example_16khz.wav", "example-distort-16khz.wav", "example-full-degrad.wav", "example-reverb-16khz.wav"],
inputs=input_audio,
label="Sample Degraded Audios"
),
cache_examples="lazy"
with gr.Column():
gr.Markdown("### 🎢 Restored Audio Output:")
output_audio = gr.Audio(label="Restored Audio", type="filepath")
with gr.Row():
restore_btn = gr.Button("✨ Restore Audio")
# Connect the button to the function
restore_btn.click(restore_audio, inputs=input_audio, outputs=output_audio)
# Launch the demo
demo.launch(debug=True)