Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -3,13 +3,23 @@ import gradio as gr
|
|
3 |
from huggingface_hub import snapshot_download
|
4 |
from sentencepiece import SentencePieceProcessor
|
5 |
|
6 |
-
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
-
|
|
|
9 |
model_path = snapshot_download(model_name)
|
10 |
-
|
|
|
11 |
tokenizer.load(f"{model_path}/sentencepiece.model")
|
12 |
translator = ctranslate2.Translator(model_path)
|
|
|
|
|
|
|
13 |
|
14 |
def translate(input_text, target_language):
|
15 |
input_tokens = tokenizer.encode(f"<2{target_language}> {input_text}", out_type=str)
|
@@ -24,12 +34,23 @@ def translate(input_text, target_language):
|
|
24 |
translated_sentence = tokenizer.decode(results[0].hypotheses[0])
|
25 |
return translated_sentence
|
26 |
|
|
|
27 |
def translate_interface(input_text, target_language):
|
28 |
translated_text = translate(input_text, target_language)
|
29 |
return translated_text
|
30 |
|
31 |
-
|
32 |
-
|
|
|
|
|
|
|
|
|
33 |
output_text = gr.Textbox(label="Translated Text")
|
34 |
|
35 |
-
gr.Interface(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
from huggingface_hub import snapshot_download
|
4 |
from sentencepiece import SentencePieceProcessor
|
5 |
|
6 |
+
title = "MADLAD-400 Translation Demo"
|
7 |
+
description = """
|
8 |
+
<p>
|
9 |
+
Translator using <a href='https://arxiv.org/abs/2309.04662' target='_blank'>MADLAD-400</a>, a multilingual machine translation model on 250 billion tokens covering over 450 languages using publicly available data. This demo application uses <a href="https://huggingface.co/santhosh/madlad400-3b-ct2">santhosh/madlad400-3b-ct2</a> model, which is a ctranslate2 optimized model of <a href="https://huggingface.co/google/madlad400-3b-mt">google/madlad400-3b-mt</a>
|
10 |
+
</p>
|
11 |
+
"""
|
12 |
|
13 |
+
|
14 |
+
model_name = "santhosh/madlad400-3b-ct2"
|
15 |
model_path = snapshot_download(model_name)
|
16 |
+
|
17 |
+
tokenizer = SentencePieceProcessor()
|
18 |
tokenizer.load(f"{model_path}/sentencepiece.model")
|
19 |
translator = ctranslate2.Translator(model_path)
|
20 |
+
tokens = [tokenizer.decode(i) for i in range(460)]
|
21 |
+
lang_codes = [token[2:-1] for token in tokens if token.startswith("<2")]
|
22 |
+
|
23 |
|
24 |
def translate(input_text, target_language):
|
25 |
input_tokens = tokenizer.encode(f"<2{target_language}> {input_text}", out_type=str)
|
|
|
34 |
translated_sentence = tokenizer.decode(results[0].hypotheses[0])
|
35 |
return translated_sentence
|
36 |
|
37 |
+
|
38 |
def translate_interface(input_text, target_language):
|
39 |
translated_text = translate(input_text, target_language)
|
40 |
return translated_text
|
41 |
|
42 |
+
|
43 |
+
input_text = gr.Textbox(
|
44 |
+
label="Input Text",
|
45 |
+
value="Imagine a world in which every single person on the planet is given free access to the sum of all human knowledge.",
|
46 |
+
)
|
47 |
+
target_language = gr.Dropdown(lang_codes, value="en", label="Target Language")
|
48 |
output_text = gr.Textbox(label="Translated Text")
|
49 |
|
50 |
+
gr.Interface(
|
51 |
+
title=title,
|
52 |
+
description=description,
|
53 |
+
fn=translate_interface,
|
54 |
+
inputs=[input_text, target_language],
|
55 |
+
outputs=output_text,
|
56 |
+
).launch()
|