import ctranslate2 import gradio as gr from huggingface_hub import snapshot_download from sentencepiece import SentencePieceProcessor title = "Mesolitica t5-base-standard-bahasa Translation Demo" description = """
Translator using Mesolitica Malaysian Translation model. This demo application uses CTranslate2 optimized version of it: santhosh/mesolitica-t5-base-standard-bahasa-cased-ct2,
""" model_name = "santhosh/mesolitica-t5-base-standard-bahasa-cased-ct2" model_path = snapshot_download(model_name) tokenizer = SentencePieceProcessor() tokenizer.load(f"{model_path}/sentencepiece.model") translator = ctranslate2.Translator(model_path) map_lang = {"en": "Inggeris", "jv": "Jawa", "bjn": "Banjarese", "ms": "Melayu", "id": "Indonesia"} def translate(input_text, target_language): input_tokens = tokenizer.encode( f"f'terjemah ke {map_lang[target_language]}: {input_text}", out_type=str ) results = translator.translate_batch( [input_tokens], batch_type="tokens", max_input_length=6144, max_decoding_length=6144, max_batch_size=1024, beam_size=1, ) translated_sentence = tokenizer.decode(results[0].hypotheses[0]) return translated_sentence def translate_interface(input_text, target_language): translated_text = translate(input_text, target_language) return translated_text input_text = gr.Textbox( label="Input Text", value="Imagine a world in which every single person on the planet is given free access to the sum of all human knowledge.", ) languages = [ ("English", "en"), ("Bahasa Melayu", "ms"), ("Indonesian", "id"), ("Banjarese", "bjn"), ("Jawa", "jv"), ] target_language = gr.Dropdown(languages, value="en", label="Target Language") output_text = gr.Textbox(label="Translated Text") gr.Interface( title=title, description=description, fn=translate_interface, inputs=[input_text, target_language], outputs=output_text, ).launch()