Lenylvt's picture
Update app.py
c98ea09 verified
raw
history blame
3.85 kB
import gradio as gr
from faster_whisper import WhisperModel
import logging
from transformers import MarianMTModel, MarianTokenizer
import pandas as pd
import requests
# Configure logging for debugging purposes
logging.basicConfig()
logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
# Fetch and parse language options from the provided URL
url = "https://huggingface.co/Lenylvt/LanguageISO/resolve/main/iso.md"
df = pd.read_csv(url, delimiter="|", skiprows=2, header=None).dropna(axis=1, how='all')
df.columns = ['ISO 639-1', 'ISO 639-2', 'Language Name', 'Native Name']
df['ISO 639-1'] = df['ISO 639-1'].str.strip()
# Prepare language options for the dropdown
language_options = [(row['ISO 639-1'], f"{row['ISO 639-1']}") for index, row in df.iterrows()]
def transcribe_and_optionally_translate(audio_file, source_language, target_language, model_size, change_transcript):
# Transcription
device = "cpu" # Use "cuda" for GPU
compute_type = "int8" # Use "float16" or "int8" for GPU, "int8" for CPU
model = WhisperModel(model_size, device=device, compute_type=compute_type)
segments, _ = model.transcribe(audio_file)
transcription = " ".join([segment.text for segment in segments])
if change_transcript:
# Assume user will modify the transcript manually before translation
return transcription, True
# Translation
if source_language != target_language:
model_name = f"Helsinki-NLP/opus-mt-{source_language}-{target_language}"
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)
translated = model.generate(**tokenizer(transcription, return_tensors="pt", padding=True, truncation=True, max_length=512))
transcription = tokenizer.decode(translated[0], skip_special_tokens=True)
return transcription, False
def add_hard_subtitle_to_video(input_video, transcript):
"""Add hard subtitles to video."""
temp_subtitle_path = '/tmp/subtitle.srt'
with open(temp_subtitle_path, 'w', encoding='utf-8') as file:
file.write(transcript) # Assuming transcript is in SRT format
output_video_path = f"/tmp/output_video.mp4"
ffmpeg.input(input_video).output(output_video_path, vf=f"subtitles={temp_subtitle_path}").run(quiet=True)
return output_video_path
def process_video(video, source_language, target_language, model_size='base', change_transcript=False, modified_transcript=None):
audio_file = video # Directly use the video file as the audio input
transcript, can_modify = transcribe_and_optionally_translate(audio_file, source_language, target_language, model_size, change_transcript)
if can_modify and modified_transcript:
# Use the modified transcript for translation if allowed and provided
transcript = modified_transcript
# Perform translation here if necessary (similar to the previous step)
output_video = add_hard_subtitle_to_video(video, transcript)
return output_video
# Setup the Gradio app
app = gr.Interface(
fn=process_video,
inputs=[
gr.Video(label="Upload Video"),
gr.Dropdown(choices=language_options, label="Source Language"),
gr.Dropdown(choices=language_options, label="Target Language"),
gr.Dropdown(choices=["base", "small", "medium", "large", "large-v2", "large-v3"], label="Model Size"),
gr.Checkbox(label="Change Transcript before Translation?", value=False),
gr.TextArea(label="Modified Transcript (if allowed)")
],
outputs=gr.Text(label="Transcript"),
title="Video Transcription and Translation Tool",
description="Transcribe or translate your video content. Optionally, edit the transcription before adding hard subtitles."
)
if __name__ == "__main__":
app.launch()