zionia's picture
add logo
e7da02f verified
raw
history blame
3.13 kB
import gradio as gr
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
model_name = "dsfsi/nso-en-m2m100-gov"
tokenizer = M2M100Tokenizer.from_pretrained(model_name)
model = M2M100ForConditionalGeneration.from_pretrained(model_name)
print(tokenizer.lang_code_to_token)
tokenizer.src_lang = "ns"
model.config.forced_bos_token_id = tokenizer.get_lang_id("en")
def translate(inp):
inputs = tokenizer(inp, return_tensors="pt")
translated_tokens = model.generate(**inputs, max_length=512, forced_bos_token_id=tokenizer.get_lang_id("en"))
translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
return translated_text
img1, img2, img3 = gr.Columns(3)
with img2:
gr.Image("logo_transparent_small.png", alt="DSFSI Logo", elem_id="logo", label=None)
description = """
<p style='text-align: center;'>
Northern Sotho to English Translation
</p>
<p>
This space provides a translation service from Northern Sotho to English using the M2M100 model, fine-tuned for low-resource languages. It supports researchers, linguists, and users working with Northern Sotho texts.
</p>
"""
article = """
<div style='text-align: center;'>
<a href='https://github.com/dsfsi/nso-en-m2m100-gov' target='_blank'>GitHub</a> |
<a href='https://docs.google.com/forms/d/e/1FAIpQLSf7S36dyAUPx2egmXbFpnTBuzoRulhL5Elu-N1eoMhaO7v10w/viewform' target='_blank'>Feedback Form</a> |
<a href='https://arxiv.org/abs/2303.03750' target='_blank'>Arxiv</a>
</div>
<br/>
<p style='text-align: center;'>
<h2>Translate | Northern Sotho to English (dsfsi/nso-en-m2m100-gov)</h2>
</p>
"""
extra_info = """
<div style='text-align: center;'>
<h4>More information about the space</h4>
</div>
<p>
This is a variant of the M2M100 model, fine-tuned on a multilingual dataset to support translation from Northern Sotho (Sepedi) to English. The model was trained with a focus on improving translation accuracy for low-resource languages.
</p>
"""
with gr.Interface(
fn=translate,
title="Northern Sotho to English Translation",
description=description,
article=article,
inputs=gr.components.Textbox(lines=5, placeholder="Enter Northern Sotho text (maximum 5 lines)", label="Input"),
outputs="text"
) as iface:
iface.launch(enable_queue=True)
authors = """
<div style='text-align: center;'>
Authors: Vukosi Marivate, Matimba Shingange, Richard Lastrucci, Isheanesu Joseph Dzingirai, Jenalea Rajab
</div>
"""
citation = """
<div style='text-align: center;'>
<p>
@inproceedings{{dsfsi2024, title={{Northern Sotho to English Translation using M2M100}},
author={{DSFSI Research Team}}, year={{2024}},
url={{https://huggingface.co/dsfsi/nso-en-m2m100-gov}}
}}
</p>
</div>
"""
doi = """
<div style='text-align: center;'>
DOI: <a href="https://doi.org/10.1234/dsfsi.2024.001" target="_blank">10.1234/dsfsi.2024.001</a>
</div>
"""
gr.markdown(extra_info, unsafe_allow_html=True)
gr.markdown(authors, unsafe_allow_html=True)
gr.markdown(citation, unsafe_allow_html=True)
gr.markdown(doi, unsafe_allow_html=True)