import gradio as gr from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer model_name = "dsfsi/nso-en-m2m100-gov" tokenizer = M2M100Tokenizer.from_pretrained(model_name) model = M2M100ForConditionalGeneration.from_pretrained(model_name) print(tokenizer.lang_code_to_token) tokenizer.src_lang = "ns" model.config.forced_bos_token_id = tokenizer.get_lang_id("en") def translate(inp): inputs = tokenizer(inp, return_tensors="pt") translated_tokens = model.generate(**inputs, max_length=512, forced_bos_token_id=tokenizer.get_lang_id("en")) translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True) return translated_text img1, img2, img3 = gr.Columns(3) with img2: gr.Image("logo_transparent_small.png", alt="DSFSI Logo", elem_id="logo", label=None) description = """
Northern Sotho to English Translation
This space provides a translation service from Northern Sotho to English using the M2M100 model, fine-tuned for low-resource languages. It supports researchers, linguists, and users working with Northern Sotho texts.
""" article = """
This is a variant of the M2M100 model, fine-tuned on a multilingual dataset to support translation from Northern Sotho (Sepedi) to English. The model was trained with a focus on improving translation accuracy for low-resource languages.
""" with gr.Interface( fn=translate, title="Northern Sotho to English Translation", description=description, article=article, inputs=gr.components.Textbox(lines=5, placeholder="Enter Northern Sotho text (maximum 5 lines)", label="Input"), outputs="text" ) as iface: iface.launch(enable_queue=True) authors = """@inproceedings{{dsfsi2024, title={{Northern Sotho to English Translation using M2M100}}, author={{DSFSI Research Team}}, year={{2024}}, url={{https://huggingface.co/dsfsi/nso-en-m2m100-gov}} }}