Lenylvt commited on
Commit
effdcb4
1 Parent(s): dbc58d4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -65
app.py CHANGED
@@ -1,86 +1,134 @@
1
  import gradio as gr
2
  from faster_whisper import WhisperModel
3
  import logging
4
- from transformers import MarianMTModel, MarianTokenizer
 
5
  import pandas as pd
6
- import requests
7
  import ffmpeg
8
 
9
- # Configure logging for debugging purposes
10
- logging.basicConfig()
11
- logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
12
-
13
- # Fetch and parse language options from the provided URL
14
  url = "https://huggingface.co/Lenylvt/LanguageISO/resolve/main/iso.md"
15
  df = pd.read_csv(url, delimiter="|", skiprows=2, header=None).dropna(axis=1, how='all')
16
  df.columns = ['ISO 639-1', 'ISO 639-2', 'Language Name', 'Native Name']
17
  df['ISO 639-1'] = df['ISO 639-1'].str.strip()
18
 
19
- # Prepare language options for the dropdown
20
  language_options = [(row['ISO 639-1'], f"{row['ISO 639-1']}") for index, row in df.iterrows()]
21
 
22
- def transcribe_and_optionally_translate(audio_file, source_language, target_language, model_size, change_transcript):
23
- # Transcription
24
- device = "cpu" # Use "cuda" for GPU
25
- compute_type = "int8" # Use "float16" or "int8" for GPU, "int8" for CPU
26
- model = WhisperModel(model_size, device=device, compute_type=compute_type)
27
- segments, _ = model.transcribe(audio_file)
28
- transcription = " ".join([segment.text for segment in segments])
29
-
30
- if change_transcript:
31
- # Assume user will modify the transcript manually before translation
32
- return transcription, True
33
-
34
- # Translation
35
- if source_language != target_language:
36
- model_name = f"Helsinki-NLP/opus-mt-{source_language}-{target_language}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  tokenizer = MarianTokenizer.from_pretrained(model_name)
38
  model = MarianMTModel.from_pretrained(model_name)
39
- translated = model.generate(**tokenizer(transcription, return_tensors="pt", padding=True, truncation=True, max_length=512))
40
- transcription = tokenizer.decode(translated[0], skip_special_tokens=True)
41
-
42
- return transcription, False
 
43
 
44
- def add_hard_subtitle_to_video(input_video, transcript):
45
- """Add hard subtitles to video."""
46
- temp_subtitle_path = '/tmp/subtitle.srt'
47
- with open(temp_subtitle_path, 'w', encoding='utf-8') as file:
48
- file.write(transcript) # Assuming transcript is in SRT format
49
-
50
- output_video_path = f"/tmp/output_video.mp4"
51
- ffmpeg.input(input_video).output(output_video_path, vf=f"subtitles={temp_subtitle_path}").run(quiet=True)
52
-
53
- return output_video_path
 
 
 
54
 
55
- def process_video(video, source_language, target_language, model_size='base', change_transcript=False, modified_transcript=None):
56
- audio_file = video # Directly use the video file as the audio input
57
-
58
- transcript, can_modify = transcribe_and_optionally_translate(audio_file, source_language, target_language, model_size, change_transcript)
59
-
60
- if can_modify and modified_transcript:
61
- # Use the modified transcript for translation if allowed and provided
62
- transcript = modified_transcript
63
- # Perform translation here if necessary (similar to the previous step)
64
-
65
- output_video = add_hard_subtitle_to_video(video, transcript)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  return output_video
67
 
68
- # Setup the Gradio app
69
- app = gr.Interface(
70
- fn=process_video,
71
- inputs=[
72
- gr.Video(label="Upload Video"),
73
- gr.Dropdown(choices=language_options, label="Source Language"),
74
- gr.Dropdown(choices=language_options, label="Target Language"),
75
- gr.Dropdown(choices=["base", "small", "medium", "large", "large-v2", "large-v3"], label="Model Size"),
76
- gr.Checkbox(label="Change Transcript before Translation?", value=False),
77
- gr.TextArea(label="Modified Transcript (if allowed)")
78
- ],
79
- outputs=gr.Text(label="Transcript"),
80
- title="Video Transcription and Translation Tool",
81
- description="Transcribe or translate your video content. Optionally, edit the transcription before adding hard subtitles."
82
- )
83
 
84
- if __name__ == "__main__":
85
- app.launch()
 
 
 
 
 
86
 
 
 
 
1
  import gradio as gr
2
  from faster_whisper import WhisperModel
3
  import logging
4
+ import os
5
+ import pysrt
6
  import pandas as pd
7
+ from transformers import MarianMTModel, MarianTokenizer
8
  import ffmpeg
9
 
10
+ # Configuration initiale et chargement des données
 
 
 
 
11
  url = "https://huggingface.co/Lenylvt/LanguageISO/resolve/main/iso.md"
12
  df = pd.read_csv(url, delimiter="|", skiprows=2, header=None).dropna(axis=1, how='all')
13
  df.columns = ['ISO 639-1', 'ISO 639-2', 'Language Name', 'Native Name']
14
  df['ISO 639-1'] = df['ISO 639-1'].str.strip()
15
 
 
16
  language_options = [(row['ISO 639-1'], f"{row['ISO 639-1']}") for index, row in df.iterrows()]
17
 
18
+ logging.basicConfig(level=logging.DEBUG)
19
+
20
+ # Fonction pour formater un texte en SRT
21
+ def text_to_srt(text):
22
+ lines = text.split('\n')
23
+ srt_content = ""
24
+ for i, line in enumerate(lines):
25
+ if line.strip() == "":
26
+ continue
27
+ try:
28
+ times, content = line.split(']', 1)
29
+ start, end = times[1:].split(' -> ')
30
+ if start.count(":") == 1:
31
+ start = "00:" + start
32
+ if end.count(":") == 1:
33
+ end = "00:" + end
34
+ srt_content += f"{i+1}\n{start.replace('.', ',')} --> {end.replace('.', ',')}\n{content.strip()}\n\n"
35
+ except ValueError:
36
+ continue
37
+ temp_file_path = '/tmp/output.srt'
38
+ with open(temp_file_path, 'w', encoding='utf-8') as file:
39
+ file.write(srt_content)
40
+ return temp_file_path
41
+
42
+ # Fonction pour formater des secondes en timestamp
43
+ def format_timestamp(seconds):
44
+ hours = int(seconds // 3600)
45
+ minutes = int((seconds % 3600) // 60)
46
+ seconds_remainder = seconds % 60
47
+ return f"{hours:02d}:{minutes:02d}:{seconds_remainder:06.3f}"
48
+
49
+ # Fonction de traduction de texte
50
+ def translate_text(text, source_language_code, target_language_code):
51
+ model_name = f"Helsinki-NLP/opus-mt-{source_language_code}-{target_language_code}"
52
+ if source_language_code == target_language_code:
53
+ return "Translation between the same languages is not supported."
54
+ try:
55
  tokenizer = MarianTokenizer.from_pretrained(model_name)
56
  model = MarianMTModel.from_pretrained(model_name)
57
+ except Exception as e:
58
+ return f"Failed to load model for {source_language_code} to {target_language_code}: {str(e)}"
59
+ translated = model.generate(**tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512))
60
+ translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
61
+ return translated_text
62
 
63
+ # Fonction pour traduire un fichier SRT
64
+ def translate_srt(input_file_path, source_language_code, target_language_code, progress=None):
65
+ subs = pysrt.open(input_file_path)
66
+ translated_subs = []
67
+ for idx, sub in enumerate(subs):
68
+ translated_text = translate_text(sub.text, source_language_code, target_language_code)
69
+ translated_sub = pysrt.SubRipItem(index=idx+1, start=sub.start, end=sub.end, text=translated_text)
70
+ translated_subs.append(translated_sub)
71
+ if progress:
72
+ progress((idx + 1) / len(subs))
73
+ translated_srt_path = input_file_path.replace(".srt", f"_{target_language_code}.srt")
74
+ pysrt.SubRipFile(translated_subs).save(translated_srt_path)
75
+ return translated_srt_path
76
 
77
+ # Fonction pour transcrire l'audio d'une vidéo en texte
78
+ def transcribe(audio_file_path, model_size="base"):
79
+ device = "cpu"
80
+ compute_type = "int8"
81
+ model = WhisperModel(model_size=model_size, device=device, compute_type=compute_type)
82
+ segments, _ = model.transcribe(audio_file_path)
83
+ transcription_with_timestamps = [
84
+ f"[{format_timestamp(segment.start)} -> {format_timestamp(segment.end)}] {segment.text}"
85
+ for segment in segments
86
+ ]
87
+ return "\n".join(transcription_with_timestamps)
88
+
89
+ # Fonction pour ajouter des sous-titres à une vidéo
90
+ def add_subtitle_to_video(input_video, subtitle_file, subtitle_language, soft_subtitle):
91
+ video_input_stream = ffmpeg.input(input_video)
92
+ subtitle_input_stream = ffmpeg.input(subtitle_file)
93
+ input_video_name = os.path.splitext(os.path.basename(input_video))[0]
94
+ output_video = f"/tmp/output-{input_video_name}.mp4"
95
+ subtitle_track_title = os.path.splitext(os.path.basename(subtitle_file))[0]
96
+
97
+ if soft_subtitle:
98
+ stream = ffmpeg.output(
99
+ video_input_stream, subtitle_input_stream, output_video,
100
+ **{"c": "copy", "c:s": "mov_text"},
101
+ **{"metadata:s:s:0": f"language={subtitle_language}", "metadata:s:s:0": f"title={subtitle_track_title}"}
102
+ )
103
+ else:
104
+ stream = ffmpeg.output(
105
+ video_input_stream, output_video,
106
+ vf=f"subtitles={subtitle_file}"
107
+ )
108
+
109
+ ffmpeg.run(stream, overwrite_output=True)
110
  return output_video
111
 
112
+ # Définition des interfaces Gradio
113
+ def transcribe_and_translate_video(video_file, source_language_code, target_language_code):
114
+ transcription = transcribe(video_file.name, "tiny")
115
+ srt_path = text_to_srt(transcription)
116
+ translated_srt_path = translate_srt(srt_path, source_language_code, target_language_code)
117
+ output_video = add_subtitle_to_video(video_file.name, translated_srt_path, target_language_code, False)
118
+ return output_video, translated_srt_path
119
+
120
+ # Inputs et outputs Gradio
121
+ video_input = gr.inputs.Video(label="Video File")
122
+ source_language_dropdown = gr.inputs.Dropdown(choices=language_options, label="Source Language")
123
+ target_language_dropdown = gr.inputs.Dropdown(choices=language_options, label="Target Language")
 
 
 
124
 
125
+ transcribe_translate_interface = gr.Interface(
126
+ fn=transcribe_and_translate_video,
127
+ inputs=[video_input, source_language_dropdown, target_language_dropdown],
128
+ outputs=[gr.outputs.Video(label="Video with Translated Subtitles"), gr.outputs.File(label="Translated Subtitles (.srt)")],
129
+ title="Video Transcribe & Translate",
130
+ description="Transcribe and translate the subtitles of your video into another language."
131
+ )
132
 
133
+ # Lancement de l'application Gradio
134
+ transcribe_translate_interface.launch()