Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer | |
model_name = "dsfsi/nso-en-m2m100-gov" | |
tokenizer = M2M100Tokenizer.from_pretrained(model_name) | |
model = M2M100ForConditionalGeneration.from_pretrained(model_name) | |
print(tokenizer.lang_code_to_token) | |
tokenizer.src_lang = "ns" | |
model.config.forced_bos_token_id = tokenizer.get_lang_id("en") | |
def translate(inp): | |
inputs = tokenizer(inp, return_tensors="pt") | |
translated_tokens = model.generate(**inputs, max_length=512, forced_bos_token_id=tokenizer.get_lang_id("en")) | |
translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True) | |
return translated_text | |
img1, img2, img3 = gr.Columns(3) | |
with img2: | |
gr.Image("logo_transparent_small.png", alt="DSFSI Logo", elem_id="logo", label=None) | |
description = """ | |
<p style='text-align: center;'> | |
Northern Sotho to English Translation | |
</p> | |
<p> | |
This space provides a translation service from Northern Sotho to English using the M2M100 model, fine-tuned for low-resource languages. It supports researchers, linguists, and users working with Northern Sotho texts. | |
</p> | |
""" | |
article = """ | |
<div style='text-align: center;'> | |
<a href='https://github.com/dsfsi/nso-en-m2m100-gov' target='_blank'>GitHub</a> | | |
<a href='https://docs.google.com/forms/d/e/1FAIpQLSf7S36dyAUPx2egmXbFpnTBuzoRulhL5Elu-N1eoMhaO7v10w/viewform' target='_blank'>Feedback Form</a> | | |
<a href='https://arxiv.org/abs/2303.03750' target='_blank'>Arxiv</a> | |
</div> | |
<br/> | |
<p style='text-align: center;'> | |
<h2>Translate | Northern Sotho to English (dsfsi/nso-en-m2m100-gov)</h2> | |
</p> | |
""" | |
extra_info = """ | |
<div style='text-align: center;'> | |
<h4>More information about the space</h4> | |
</div> | |
<p> | |
This is a variant of the M2M100 model, fine-tuned on a multilingual dataset to support translation from Northern Sotho (Sepedi) to English. The model was trained with a focus on improving translation accuracy for low-resource languages. | |
</p> | |
""" | |
with gr.Interface( | |
fn=translate, | |
title="Northern Sotho to English Translation", | |
description=description, | |
article=article, | |
inputs=gr.components.Textbox(lines=5, placeholder="Enter Northern Sotho text (maximum 5 lines)", label="Input"), | |
outputs="text" | |
) as iface: | |
iface.launch(enable_queue=True) | |
authors = """ | |
<div style='text-align: center;'> | |
Authors: Vukosi Marivate, Matimba Shingange, Richard Lastrucci, Isheanesu Joseph Dzingirai, Jenalea Rajab | |
</div> | |
""" | |
citation = """ | |
<div style='text-align: center;'> | |
<p> | |
@inproceedings{{dsfsi2024, title={{Northern Sotho to English Translation using M2M100}}, | |
author={{DSFSI Research Team}}, year={{2024}}, | |
url={{https://huggingface.co/dsfsi/nso-en-m2m100-gov}} | |
}} | |
</p> | |
</div> | |
""" | |
doi = """ | |
<div style='text-align: center;'> | |
DOI: <a href="https://doi.org/10.1234/dsfsi.2024.001" target="_blank">10.1234/dsfsi.2024.001</a> | |
</div> | |
""" | |
gr.markdown(extra_info, unsafe_allow_html=True) | |
gr.markdown(authors, unsafe_allow_html=True) | |
gr.markdown(citation, unsafe_allow_html=True) | |
gr.markdown(doi, unsafe_allow_html=True) | |