File size: 1,358 Bytes
7af1f35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import ctranslate2
import gradio as gr
from huggingface_hub import snapshot_download
from sentencepiece import SentencePieceProcessor

model_name="santhosh/madlad400-3b-ct2"

tokenizer = SentencePieceProcessor()
model_path = snapshot_download(model_name)
print(model_path)
tokenizer.load(f"{model_path}/sentencepiece.model")
translator = ctranslate2.Translator(model_path)

def translate(input_text, target_language):
    input_tokens = tokenizer.encode(f"<2{target_language}> {input_text}", out_type=str)
    results = translator.translate_batch(
        [input_tokens],
        batch_type="tokens",
        # max_batch_size=1024,
        beam_size=1,
        no_repeat_ngram_size=1,
        # repetition_penalty=2,
    )
    translated_sentence = tokenizer.decode(results[0].hypotheses[0])
    return translated_sentence

def translate_interface(input_text, target_language):
    translated_text = translate(input_text, target_language)
    return translated_text

input_text = gr.Textbox(label="Input Text", value="Imagine a world in which every single person on the planet is given free access to the sum of all human knowledge. ")
target_language = gr.Textbox(value="ml", label="Target Language")
output_text = gr.Textbox(label="Translated Text")

gr.Interface(fn=translate_interface, inputs=[input_text, target_language], outputs=output_text).launch()