Spaces:
Running
Running
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import ctranslate2
|
2 |
+
import gradio as gr
|
3 |
+
from huggingface_hub import snapshot_download
|
4 |
+
from sentencepiece import SentencePieceProcessor
|
5 |
+
|
6 |
+
model_name="santhosh/madlad400-3b-ct2"
|
7 |
+
|
8 |
+
tokenizer = SentencePieceProcessor()
|
9 |
+
model_path = snapshot_download(model_name)
|
10 |
+
print(model_path)
|
11 |
+
tokenizer.load(f"{model_path}/sentencepiece.model")
|
12 |
+
translator = ctranslate2.Translator(model_path)
|
13 |
+
|
14 |
+
def translate(input_text, target_language):
|
15 |
+
input_tokens = tokenizer.encode(f"<2{target_language}> {input_text}", out_type=str)
|
16 |
+
results = translator.translate_batch(
|
17 |
+
[input_tokens],
|
18 |
+
batch_type="tokens",
|
19 |
+
# max_batch_size=1024,
|
20 |
+
beam_size=1,
|
21 |
+
no_repeat_ngram_size=1,
|
22 |
+
# repetition_penalty=2,
|
23 |
+
)
|
24 |
+
translated_sentence = tokenizer.decode(results[0].hypotheses[0])
|
25 |
+
return translated_sentence
|
26 |
+
|
27 |
+
def translate_interface(input_text, target_language):
|
28 |
+
translated_text = translate(input_text, target_language)
|
29 |
+
return translated_text
|
30 |
+
|
31 |
+
input_text = gr.Textbox(label="Input Text", value="Imagine a world in which every single person on the planet is given free access to the sum of all human knowledge. ")
|
32 |
+
target_language = gr.Textbox(value="ml", label="Target Language")
|
33 |
+
output_text = gr.Textbox(label="Translated Text")
|
34 |
+
|
35 |
+
gr.Interface(fn=translate_interface, inputs=[input_text, target_language], outputs=output_text).launch()
|