Lenylvt commited on
Commit
4f0841f
1 Parent(s): 42666b7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -42
app.py CHANGED
@@ -1,50 +1,97 @@
1
  import gradio as gr
 
 
2
  import os
3
- import ffmpeg
4
- import pysrt
 
5
  import pandas as pd
 
6
  import requests
7
- import io
8
- from transformers import MarianMTModel, MarianTokenizer
9
- from gradio_client import Client
10
-
11
- # Initialize Gradio Client for Whisper JAX
12
- client = Client(src="sanchit-gandhi/whisper-jax")
13
-
14
- def fetch_languages(url):
15
- response = requests.get(url)
16
- if response.status_code == 200:
17
- csv_content = response.content.decode('utf-8')
18
- df = pd.read_csv(io.StringIO(csv_content), delimiter="|", skiprows=2, header=None).dropna(axis=1, how='all')
19
- df.columns = ['ISO 639-1', 'ISO 639-2', 'Language Name', 'Native Name']
20
- df['ISO 639-1'] = df['ISO 639-1'].str.strip()
21
- language_options = [(row['ISO 639-1'], f"{row['ISO 639-1']} - {row['Language Name']}") for index, row in df.iterrows()]
22
- return language_options
23
- else:
24
- return []
25
-
26
- def transcript_audio(audio_file, task, return_timestamps, api_name="/predict_1"):
27
- prediction = client.predict(audio_file=audio_file, task=task, return_timestamps=return_timestamps, api_name=api_name)
28
- return prediction['transcription'], prediction['transcription_time_s']
29
-
30
- def process_video(input_video, video_language, target_language):
31
- transcription, _ = transcript_audio(input_video, "transcribe", True)
32
- srt_path = text_to_srt(transcription)
33
- translated_srt_path = translate_srt(srt_path, video_language, target_language)
34
- output_video = add_subtitle_to_video(input_video, translated_srt_path)
35
- return output_video
36
 
37
- language_url = "https://huggingface.co/Lenylvt/LanguageISO/resolve/main/iso.md"
38
- video_language_options = fetch_languages(language_url)
 
 
 
 
 
 
 
39
 
40
- with gr.Blocks() as app:
41
- with gr.Row():
42
- input_video = gr.Video(label="Video File")
43
- video_language = gr.Dropdown(choices=video_language_options, label="Language of the Video")
44
- target_language = gr.Dropdown(choices=video_language_options, label="Language Translated")
45
- process_btn = gr.Button("Process Video")
46
- output_video = gr.Video(label="Video with Translated Subtitles")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
- process_btn.click(fn=process_video, inputs=[input_video, video_language, target_language], outputs=output_video)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
- app.launch()
 
 
1
  import gradio as gr
2
+ from faster_whisper import WhisperModel
3
+ import logging
4
  import os
5
+ from moviepy.editor import VideoFileClip
6
+ import ffmpeg # Make sure to install ffmpeg-python
7
+ from transformers import MarianMTModel, MarianTokenizer
8
  import pandas as pd
9
+ import pysrt
10
  import requests
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
+ # Configure logging for debugging purposes
13
+ logging.basicConfig()
14
+ logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
15
+
16
+ # Fetch and parse language options from the provided URL
17
+ url = "https://huggingface.co/Lenylvt/LanguageISO/resolve/main/iso.md"
18
+ df = pd.read_csv(url, delimiter="|", skiprows=2, header=None).dropna(axis=1, how='all')
19
+ df.columns = ['ISO 639-1', 'ISO 639-2', 'Language Name', 'Native Name']
20
+ df['ISO 639-1'] = df['ISO 639-1'].str.strip()
21
 
22
+ # Prepare language options for the dropdown
23
+ language_options = [(row['ISO 639-1'], f"{row['Language Name']} ({row['ISO 639-1']})") for index, row in df.iterrows()]
24
+
25
+ def format_timestamp(seconds):
26
+ """Convert seconds to HH:MM:SS.mmm format."""
27
+ hours = int(seconds // 3600)
28
+ minutes = int((seconds % 3600) // 60)
29
+ seconds_remainder = seconds % 60
30
+ return f"{hours:02d}:{minutes:02d}:{seconds_remainder:06.3f}"
31
+
32
+ def extract_audio(video_path):
33
+ """Extract audio from video to a temporary audio file."""
34
+ output_audio_path = '/tmp/audio.wav'
35
+ ffmpeg.input(video_path).output(output_audio_path, acodec='pcm_s16le', ac=1, ar='16k').run(quiet=True)
36
+ return output_audio_path
37
+
38
+ def transcribe_and_optionally_translate(video_file, source_language, target_language, model_size, allow_modification):
39
+ audio_file = extract_audio(video_file)
40
+
41
+ # Transcription
42
+ device = "cpu"
43
+ compute_type = "int8"
44
+ model = WhisperModel(model_size, device=device, compute_type=compute_type)
45
+ segments, _ = model.transcribe(audio_file, source_language=source_language)
46
+ transcription = " ".join([segment.text for segment in segments])
47
+
48
+ # Translation
49
+ if source_language != target_language:
50
+ model_name = f"Helsinki-NLP/opus-mt-{source_language}-{target_language}"
51
+ tokenizer = MarianTokenizer.from_pretrained(model_name)
52
+ model = MarianMTModel.from_pretrained(model_name)
53
+ translated = model.generate(**tokenizer(transcription, return_tensors="pt", padding=True, truncation=True, max_length=512))
54
+ transcription = tokenizer.decode(translated[0], skip_special_tokens=True)
55
+
56
+ return transcription, allow_modification
57
+
58
+ def add_hard_subtitle_to_video(input_video, transcript):
59
+ """Add hard subtitles to video."""
60
+ temp_subtitle_path = '/tmp/subtitle.srt'
61
+ with open(temp_subtitle_path, 'w', encoding='utf-8') as file:
62
+ file.write(transcript) # Assuming transcript is in SRT format
63
+
64
+ output_video_path = f"/tmp/output_video.mp4"
65
+ ffmpeg.input(input_video).output(output_video_path, vf=f"subtitles={temp_subtitle_path}").run(quiet=True)
66
+
67
+ return output_video_path
68
+
69
+ # Gradio Interface
70
+ def process_video(video, source_language, target_language, model_size='base', allow_modification=False, modified_transcript=None):
71
+ transcript, can_modify = transcribe_and_optionally_translate(video, source_language, target_language, model_size, allow_modification)
72
+
73
+ if can_modify and modified_transcript:
74
+ transcript = modified_transcript # Use the modified transcript if provided
75
+
76
+ # Add hard subtitles to the video
77
+ output_video = add_hard_subtitle_to_video(video, transcript)
78
+ return output_video
79
 
80
+ # Setup the Gradio app
81
+ app = gr.Interface(
82
+ fn=process_video,
83
+ inputs=[
84
+ gr.Video(label="Upload Video"),
85
+ gr.Dropdown(choices=language_options, label="Source Language"),
86
+ gr.Dropdown(choices=language_options, label="Target Language"),
87
+ gr.Dropdown(choices=["base", "small", "medium", "large", "large-v2", "large-v3"], label="Model Size"),
88
+ gr.Checkbox(label="Allow Transcript Modification?", value=False),
89
+ gr.TextArea(label="Modified Transcript (if allowed)")
90
+ ],
91
+ outputs=gr.Video(label="Processed Video with Hard Subtitles"),
92
+ title="Video Transcription and Translation Tool",
93
+ description="Transcribe or translate your video content. Optionally, edit the transcription before adding hard subtitles."
94
+ )
95
 
96
+ if __name__ == "__main__":
97
+ app.launch()