File size: 6,857 Bytes
3c71f27
9e156fa
4f0841f
 
effdcb4
 
9e156fa
effdcb4
dbc58d4
f537405
9e156fa
effdcb4
4f0841f
 
 
 
9e156fa
8c56203
219d138
4f0841f
effdcb4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c71f27
effdcb4
 
 
 
 
 
3c71f27
 
effdcb4
 
 
 
 
 
4f0841f
 
effdcb4
 
 
 
 
3c71f27
 
effdcb4
 
 
 
 
 
 
 
 
 
 
 
 
3c71f27
 
effdcb4
 
f537405
 
bfe6e49
effdcb4
 
 
 
 
 
3c71f27
 
effdcb4
eb5718c
effdcb4
 
 
eb5718c
effdcb4
eb5718c
effdcb4
eb5718c
effdcb4
4f0841f
9e156fa
b7a9c79
0121f2d
 
 
 
 
 
b7a9c79
 
 
f678d41
 
b7a9c79
 
 
 
 
 
219d138
 
b7a9c79
fc75498
b7a9c79
 
219d138
 
b7a9c79
 
fc75498
b7a9c79
 
219d138
 
eb5718c
 
b7a9c79
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import spaces
import gradio as gr
from faster_whisper import WhisperModel
import logging
import os
import pysrt
import pandas as pd
from transformers import MarianMTModel, MarianTokenizer
import ffmpeg
import torch

# Configuration initiale et chargement des données
url = "https://huggingface.co/Lenylvt/LanguageISO/resolve/main/iso.md"
df = pd.read_csv(url, delimiter="|", skiprows=2, header=None).dropna(axis=1, how='all')
df.columns = ['ISO 639-1', 'ISO 639-2', 'Language Name', 'Native Name']
df['ISO 639-1'] = df['ISO 639-1'].str.strip()

language_options = [(row['ISO 639-1'], f"{row['ISO 639-1']}") for index, row in df.iterrows()]
model_size_options = ["tiny", "base", "small", "medium", "large", "large-v2", "large-v3"]  # Add model size options

logging.basicConfig(level=logging.DEBUG)

# Fonction pour formater un texte en SRT
def text_to_srt(text):
    lines = text.split('\n')
    srt_content = ""
    for i, line in enumerate(lines):
        if line.strip() == "":
            continue
        try:
            times, content = line.split(']', 1)
            start, end = times[1:].split(' -> ')
            if start.count(":") == 1:
                start = "00:" + start
            if end.count(":") == 1:
                end = "00:" + end
            srt_content += f"{i+1}\n{start.replace('.', ',')} --> {end.replace('.', ',')}\n{content.strip()}\n\n"
        except ValueError:
            continue
    temp_file_path = '/tmp/output.srt'
    with open(temp_file_path, 'w', encoding='utf-8') as file:
        file.write(srt_content)
    return temp_file_path
    
# Fonction pour formater des secondes en timestamp
def format_timestamp(seconds):
    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
    seconds_remainder = seconds % 60
    return f"{hours:02d}:{minutes:02d}:{seconds_remainder:06.3f}"
    
@spaces.GPU
# Fonction de traduction de texte
def translate_text(text, source_language_code, target_language_code):
    model_name = f"Helsinki-NLP/opus-mt-{source_language_code}-{target_language_code}"
    if source_language_code == target_language_code:
        return "Translation between the same languages is not supported."
    try:
        tokenizer = MarianTokenizer.from_pretrained(model_name)
        model = MarianMTModel.from_pretrained(model_name)
    except Exception as e:
        return f"Failed to load model for {source_language_code} to {target_language_code}: {str(e)}"
    translated = model.generate(**tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512))
    translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
    return translated_text
    
@spaces.GPU
# Fonction pour traduire un fichier SRT
def translate_srt(input_file_path, source_language_code, target_language_code, progress=None):
    subs = pysrt.open(input_file_path)
    translated_subs = []
    for idx, sub in enumerate(subs):
        translated_text = translate_text(sub.text, source_language_code, target_language_code)
        translated_sub = pysrt.SubRipItem(index=idx+1, start=sub.start, end=sub.end, text=translated_text)
        translated_subs.append(translated_sub)
        if progress:
            progress((idx + 1) / len(subs))
    translated_srt_path = input_file_path.replace(".srt", f"_{target_language_code}.srt")
    pysrt.SubRipFile(translated_subs).save(translated_srt_path)
    return translated_srt_path
    
@spaces.GPU
# Fonction pour transcrire l'audio d'une vidéo en texte
def transcribe(audio_file_path, model_size="base"):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    compute_type = "float16" if device == "cuda" else "int8"
    model = WhisperModel(model_size, device=device, compute_type=compute_type)
    segments, _ = model.transcribe(audio_file_path)
    transcription_with_timestamps = [
        f"[{format_timestamp(segment.start)} -> {format_timestamp(segment.end)}] {segment.text}"
        for segment in segments
    ]
    return "\n".join(transcription_with_timestamps)
    
@spaces.GPU
# Fonction pour ajouter des sous-titres à une vidéo
def add_subtitle_to_video(input_video, subtitle_file, subtitle_language, soft_subtitle=False):
    video_input_stream = ffmpeg.input(input_video)
    subtitle_input_stream = ffmpeg.input(subtitle_file)
    input_video_name = os.path.splitext(os.path.basename(input_video))[0]
    output_video = f"/tmp/{input_video_name}_subtitled.mp4"
    if soft_subtitle:
        stream = ffmpeg.output(video_input_stream, subtitle_input_stream, output_video, **{"c": "copy", "c:s": "mov_text"})
    else:
        stream = ffmpeg.output(video_input_stream, output_video, vf=f"subtitles={subtitle_file}")
    ffmpeg.run(stream, overwrite_output=True)
    return output_video

# Initialisation de Gradio Blocks
with gr.Blocks() as blocks_app:
    gr.Markdown(
    """
    # Video Subtitle Creation API
    For web use please visit [this space](https://huggingface.co/spaces/Lenylvt/VideoSubtitleCreation)
    """)
    with gr.Row():
        video_file = gr.Video(label="Upload Video")
        source_language_dropdown = gr.Dropdown(choices=language_options, label="Source Language", value="en")
        target_language_dropdown = gr.Dropdown(choices=language_options, label="Target Language", value="en")
        model_size_dropdown = gr.Dropdown(choices=model_size_options, label="Model Size", value="large")  # Model size dropdown
        transcribe_button = gr.Button("Transcribe Video")
        translate_button = gr.Button("Translate Subtitles")
    
    output_video = gr.Video(label="Processed Video")
    output_srt = gr.File(label="Subtitles File (.srt)")

    def transcribe_and_add_subtitles(video_file, model_size):
        transcription = transcribe(video_file, model_size)
        srt_path = text_to_srt(transcription)
        output_video_path = add_subtitle_to_video(video_file, srt_path, subtitle_language="eng", soft_subtitle=False)
        return output_video_path, srt_path
    
    def translate_subtitles_and_add_to_video(video_file, source_language_code, target_language_code, model_size):
        transcription = transcribe(video_file, model_size)
        srt_path = text_to_srt(transcription)
        translated_srt_path = translate_srt(srt_path, source_language_code, target_language_code)
        output_video_path = add_subtitle_to_video(video_file, translated_srt_path, target_language_code, soft_subtitle=False)
        return output_video_path, translated_srt_path
    
    transcribe_button.click(transcribe_and_add_subtitles, inputs=[video_file, model_size_dropdown], outputs=[output_video, output_srt])
    translate_button.click(translate_subtitles_and_add_to_video, inputs=[video_file, source_language_dropdown, target_language_dropdown, model_size_dropdown], outputs=[output_video, output_srt])

# Lancement de l'application
blocks_app.launch()