Spaces:
Sleeping
Sleeping
import gradio as gr | |
from faster_whisper import WhisperModel | |
import logging | |
from transformers import MarianMTModel, MarianTokenizer | |
import pandas as pd | |
import requests | |
# Configure logging for debugging purposes | |
logging.basicConfig() | |
logging.getLogger("faster_whisper").setLevel(logging.DEBUG) | |
# Fetch and parse language options from the provided URL | |
url = "https://huggingface.co/Lenylvt/LanguageISO/resolve/main/iso.md" | |
df = pd.read_csv(url, delimiter="|", skiprows=2, header=None).dropna(axis=1, how='all') | |
df.columns = ['ISO 639-1', 'ISO 639-2', 'Language Name', 'Native Name'] | |
df['ISO 639-1'] = df['ISO 639-1'].str.strip() | |
# Prepare language options for the dropdown | |
language_options = [(row['ISO 639-1'], f"{row['ISO 639-1']}") for index, row in df.iterrows()] | |
def transcribe_and_optionally_translate(audio_file, source_language, target_language, model_size, change_transcript): | |
# Transcription | |
device = "cpu" # Use "cuda" for GPU | |
compute_type = "int8" # Use "float16" or "int8" for GPU, "int8" for CPU | |
model = WhisperModel(model_size, device=device, compute_type=compute_type) | |
segments, _ = model.transcribe(audio_file) | |
transcription = " ".join([segment.text for segment in segments]) | |
if change_transcript: | |
# Assume user will modify the transcript manually before translation | |
return transcription, True | |
# Translation | |
if source_language != target_language: | |
model_name = f"Helsinki-NLP/opus-mt-{source_language}-{target_language}" | |
tokenizer = MarianTokenizer.from_pretrained(model_name) | |
model = MarianMTModel.from_pretrained(model_name) | |
translated = model.generate(**tokenizer(transcription, return_tensors="pt", padding=True, truncation=True, max_length=512)) | |
transcription = tokenizer.decode(translated[0], skip_special_tokens=True) | |
return transcription, False | |
def add_hard_subtitle_to_video(input_video, transcript): | |
"""Add hard subtitles to video.""" | |
temp_subtitle_path = '/tmp/subtitle.srt' | |
with open(temp_subtitle_path, 'w', encoding='utf-8') as file: | |
file.write(transcript) # Assuming transcript is in SRT format | |
output_video_path = f"/tmp/output_video.mp4" | |
ffmpeg.input(input_video).output(output_video_path, vf=f"subtitles={temp_subtitle_path}").run(quiet=True) | |
return output_video_path | |
def process_video(video, source_language, target_language, model_size='base', change_transcript=False, modified_transcript=None): | |
audio_file = video # Directly use the video file as the audio input | |
transcript, can_modify = transcribe_and_optionally_translate(audio_file, source_language, target_language, model_size, change_transcript) | |
if can_modify and modified_transcript: | |
# Use the modified transcript for translation if allowed and provided | |
transcript = modified_transcript | |
# Perform translation here if necessary (similar to the previous step) | |
output_video = add_hard_subtitle_to_video(video, transcript) | |
return output_video | |
# Setup the Gradio app | |
app = gr.Interface( | |
fn=process_video, | |
inputs=[ | |
gr.Video(label="Upload Video"), | |
gr.Dropdown(choices=language_options, label="Source Language"), | |
gr.Dropdown(choices=language_options, label="Target Language"), | |
gr.Dropdown(choices=["base", "small", "medium", "large", "large-v2", "large-v3"], label="Model Size"), | |
gr.Checkbox(label="Change Transcript before Translation?", value=False), | |
gr.TextArea(label="Modified Transcript (if allowed)") | |
], | |
outputs=gr.Text(label="Transcript"), | |
title="Video Transcription and Translation Tool", | |
description="Transcribe or translate your video content. Optionally, edit the transcription before adding hard subtitles." | |
) | |
if __name__ == "__main__": | |
app.launch() | |