zionia commited on
Commit
263fe73
1 Parent(s): dbeaa8d

update for latets gradio version

Browse files
Files changed (1) hide show
  1. app.py +53 -78
app.py CHANGED
@@ -5,8 +5,6 @@ model_name = "dsfsi/nso-en-m2m100-gov"
5
  tokenizer = M2M100Tokenizer.from_pretrained(model_name)
6
  model = M2M100ForConditionalGeneration.from_pretrained(model_name)
7
 
8
- print(tokenizer.lang_code_to_token)
9
-
10
  tokenizer.src_lang = "ns"
11
  model.config.forced_bos_token_id = tokenizer.get_lang_id("en")
12
 
@@ -16,86 +14,63 @@ def translate(inp):
16
  translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
17
  return translated_text
18
 
19
-
20
- logo = """
21
- <div style='text-align: center;'>
22
- <img src='file/logo_transparent_small.png' alt='Logo' width='150'/>
23
- </div>
24
- """
25
-
26
- description = """
27
- <p style='text-align: center;'>
28
- Northern Sotho to English Translation
29
- </p>
30
- <p>
31
- This space provides a translation service from Northern Sotho to English using the M2M100 model, fine-tuned for low-resource languages. It supports researchers, linguists, and users working with Northern Sotho texts.
32
- </p>
33
- """
34
-
35
- article = """
36
- <div style='text-align: center;'>
37
- <a href='https://github.com/dsfsi/nso-en-m2m100-gov' target='_blank'>GitHub</a> |
38
- <a href='https://docs.google.com/forms/d/e/1FAIpQLSf7S36dyAUPx2egmXbFpnTBuzoRulhL5Elu-N1eoMhaO7v10w/viewform' target='_blank'>Feedback Form</a> |
39
- <a href='https://arxiv.org/abs/2303.03750' target='_blank'>Arxiv</a>
40
- </div>
41
- <br/>
42
- """
43
-
44
- authors = """
45
- <div style='text-align: center;'>
46
- Authors: Vukosi Marivate, Matimba Shingange, Richard Lastrucci,
47
- Isheanesu Joseph Dzingirai, Jenalea Rajab
48
- </div>
49
- """
50
-
51
- citation = """
52
- <pre style="text-align: left; white-space: pre-wrap;">
53
- @inproceedings{lastrucci-etal-2023-preparing,
54
- title = "Preparing the Vuk{'}uzenzele and {ZA}-gov-multilingual {S}outh {A}frican multilingual corpora",
55
- author = "Richard Lastrucci and Isheanesu Dzingirai and Jenalea Rajab
56
- and Andani Madodonga and Matimba Shingange and Daniel Njini and Vukosi Marivate",
57
- booktitle = "Proceedings of the Fourth workshop on Resources for African Indigenous Languages (RAIL 2023)",
58
- month = may,
59
- year = "2023",
60
- address = "Dubrovnik, Croatia",
61
- publisher = "Association for Computational Linguistics",
62
- url = "https://aclanthology.org/2023.rail-1.3",
63
- pages = "18--25"
64
- }
65
- </pre>
66
- """
67
-
68
- doi = """
69
- <div style='text-align: center;'>
70
- DOI: <a href="https://doi.org/10.48550/arXiv.2303.03750" target="_blank">10.48550/arXiv.2303.03750</a>
71
- </div>
72
- """
73
-
74
  with gr.Blocks() as demo:
75
- gr.Markdown(logo)
76
- gr.Markdown(description)
77
- gr.Markdown(article)
78
-
79
- with gr.Interface(
80
- fn=translate,
81
- title="Northern Sotho to English Translation",
82
- description=description,
83
- article=article,
84
- inputs=gr.components.Textbox(lines=5, placeholder="Enter Northern Sotho text (maximum 5 lines)", label="Input"),
85
- outputs="text"
86
- ) as iface:
87
- iface.launch()
88
-
 
 
 
 
 
 
 
 
 
 
 
89
  with gr.Accordion("More Information", open=False):
90
  gr.Markdown("""
91
  <h4 style="text-align: center;">More information about the space</h4>
92
- <p>This is a variant of the M2M100 model, fine-tuned on a multilingual dataset
93
- to support translation from Northern Sotho (Sepedi) to English. The model was trained
94
- with a focus on improving translation accuracy for low-resource languages.</p>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  """)
96
- gr.Markdown(authors)
97
- gr.Markdown(citation)
98
- gr.Markdown(doi)
99
 
100
  demo.launch()
101
-
 
5
  tokenizer = M2M100Tokenizer.from_pretrained(model_name)
6
  model = M2M100ForConditionalGeneration.from_pretrained(model_name)
7
 
 
 
8
  tokenizer.src_lang = "ns"
9
  model.config.forced_bos_token_id = tokenizer.get_lang_id("en")
10
 
 
14
  translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
15
  return translated_text
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  with gr.Blocks() as demo:
18
+ gr.Markdown("""
19
+ <div style='text-align: center;'>
20
+ <img src='file/logo_transparent_small.png' alt='Logo' width='150'/>
21
+ </div>
22
+ """)
23
+
24
+ gr.Markdown("""
25
+ <h1 style='text-align: center;'>Northern Sotho to English Translation</h1>
26
+ <p style='text-align: center;'>This space provides a translation service from Northern Sotho to English using the M2M100 model, fine-tuned for low-resource languages. It supports researchers, linguists, and users working with Northern Sotho texts.</p>
27
+ """)
28
+
29
+ inp_text = gr.Textbox(lines=5, placeholder="Enter Northern Sotho text (maximum 5 lines)", label="Input")
30
+ output_text = gr.Textbox(label="Output")
31
+ translate_button = gr.Button("Translate")
32
+ translate_button.click(translate, inputs=inp_text, outputs=output_text)
33
+
34
+ gr.Markdown("""
35
+ <div style='text-align: center;'>
36
+ <a href='https://github.com/dsfsi/nso-en-m2m100-gov' target='_blank'>GitHub</a> |
37
+ <a href='https://docs.google.com/forms/d/e/1FAIpQLSf7S36dyAUPx2egmXbFpnTBuzoRulhL5Elu-N1eoMhaO7v10w/viewform' target='_blank'>Feedback Form</a> |
38
+ <a href='https://arxiv.org/abs/2303.03750' target='_blank'>Arxiv</a>
39
+ </div>
40
+ <br/>
41
+ """)
42
+
43
  with gr.Accordion("More Information", open=False):
44
  gr.Markdown("""
45
  <h4 style="text-align: center;">More information about the space</h4>
46
+ <p>This is a variant of the M2M100 model, fine-tuned on a multilingual dataset to support translation from Northern Sotho (Sepedi) to English. The model was trained with a focus on improving translation accuracy for low-resource languages.</p>
47
+ """)
48
+ gr.Markdown("""
49
+ <div style='text-align: center;'>
50
+ Authors: Vukosi Marivate, Matimba Shingange, Richard Lastrucci,
51
+ Isheanesu Joseph Dzingirai, Jenalea Rajab
52
+ </div>
53
+ """)
54
+ gr.Markdown("""
55
+ <pre style="text-align: left; white-space: pre-wrap;">
56
+ @inproceedings{lastrucci-etal-2023-preparing,
57
+ title = "Preparing the Vuk{'}uzenzele and {ZA}-gov-multilingual {S}outh {A}frican multilingual corpora",
58
+ author = "Richard Lastrucci and Isheanesu Dzingirai and Jenalea Rajab
59
+ and Andani Madodonga and Matimba Shingange and Daniel Njini and Vukosi Marivate",
60
+ booktitle = "Proceedings of the Fourth workshop on Resources for African Indigenous Languages (RAIL 2023)",
61
+ month = may,
62
+ year = "2023",
63
+ address = "Dubrovnik, Croatia",
64
+ publisher = "Association for Computational Linguistics",
65
+ url = "https://aclanthology.org/2023.rail-1.3",
66
+ pages = "18--25"
67
+ }
68
+ </pre>
69
+ """)
70
+ gr.Markdown("""
71
+ <div style='text-align: center;'>
72
+ DOI: <a href="https://doi.org/10.48550/arXiv.2303.03750" target="_blank">10.48550/arXiv.2303.03750</a>
73
+ </div>
74
  """)
 
 
 
75
 
76
  demo.launch()