Spaces:
Running
Running
import ctranslate2 | |
import gradio as gr | |
from huggingface_hub import snapshot_download | |
from sentencepiece import SentencePieceProcessor | |
model_name="santhosh/madlad400-3b-ct2" | |
tokenizer = SentencePieceProcessor() | |
model_path = snapshot_download(model_name) | |
print(model_path) | |
tokenizer.load(f"{model_path}/sentencepiece.model") | |
translator = ctranslate2.Translator(model_path) | |
def translate(input_text, target_language): | |
input_tokens = tokenizer.encode(f"<2{target_language}> {input_text}", out_type=str) | |
results = translator.translate_batch( | |
[input_tokens], | |
batch_type="tokens", | |
# max_batch_size=1024, | |
beam_size=1, | |
no_repeat_ngram_size=1, | |
# repetition_penalty=2, | |
) | |
translated_sentence = tokenizer.decode(results[0].hypotheses[0]) | |
return translated_sentence | |
def translate_interface(input_text, target_language): | |
translated_text = translate(input_text, target_language) | |
return translated_text | |
input_text = gr.Textbox(label="Input Text", value="Imagine a world in which every single person on the planet is given free access to the sum of all human knowledge. ") | |
target_language = gr.Textbox(value="ml", label="Target Language") | |
output_text = gr.Textbox(label="Translated Text") | |
gr.Interface(fn=translate_interface, inputs=[input_text, target_language], outputs=output_text).launch() | |