anzorq commited on
Commit
a4d0b27
1 Parent(s): a15bcba
Files changed (1) hide show
  1. app.py +42 -19
app.py CHANGED
@@ -1,27 +1,50 @@
1
- import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
- model_path = "anzorq/m2m100_418M_ft_ru-kbd_44K"
6
- src_lang="ru"
7
- tgt_lang="zu"
8
 
9
- tokenizer = AutoTokenizer.from_pretrained(model_path, src_lang=src_lang)
10
- model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
11
 
12
- def translate(text):
13
- inputs = tokenizer(text, return_tensors="pt")
14
 
15
- translated_tokens = model.generate(
16
- **inputs, forced_bos_token_id=tokenizer.lang_code_to_id[tgt_lang], num_beams=4, num_return_sequences=4
17
- )
18
 
19
- translations = []
20
- for translation in tokenizer.batch_decode(translated_tokens, skip_special_tokens=True):
21
- translations.append(translation)
22
 
23
- return translations
 
 
 
24
 
25
- output = gr.outputs.Textbox()
26
- iface = gr.Interface(fn=translate, inputs="text", outputs=output)
27
- iface.launch()
 
1
+ # import gradio as gr
2
+
3
+ # from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
4
+
5
+ # model_path = "anzorq/m2m100_418M_ft_ru-kbd_44K"
6
+ # src_lang="ru"
7
+ # tgt_lang="zu"
8
+
9
+ # tokenizer = AutoTokenizer.from_pretrained(model_path, src_lang=src_lang)
10
+ # model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
11
+
12
+ # def translate(text):
13
+ # inputs = tokenizer(text, return_tensors="pt")
14
 
15
+ # translated_tokens = model.generate(
16
+ # **inputs, forced_bos_token_id=tokenizer.lang_code_to_id[tgt_lang], num_beams=4, num_return_sequences=4
17
+ # )
18
+
19
+ # translations = []
20
+ # for translation in tokenizer.batch_decode(translated_tokens, skip_special_tokens=True):
21
+ # translations.append(translation)
22
+
23
+ # return translations
24
+
25
+ # output = gr.outputs.Textbox()
26
+ # iface = gr.Interface(fn=translate, inputs="text", outputs=output)
27
+ # iface.launch()
28
+
29
+ import gradio as gr
30
 
31
+ title = "Русско-черкесский переводчик"
32
+ description = """
33
+ Demo of a Russian-Circassian (Kabardian dialect) translator.
34
 
35
+ The translator is based on a machine learning model that has been trained on 45,000 Russian-Circassian sentence pairs.
 
36
 
37
+ It is based on Facebook's <a href="https://about.fb.com/news/2020/10/first-multilingual-machine-translation-model/">M2M-100 model</a>, and can also translate from 100 other languages to Circassian (English, French, Spanish, etc.), but less accurately.
 
38
 
39
+ The data corpus is constantly being expanded, and we need help in finding sentence sources, OCR, data cleaning, etc.
 
 
40
 
41
+ If you are interested in helping out with this project, please contact me at the link below.
42
+ """
43
+ article = """<p style='text-align: center'><a href='https://arxiv.org/abs/1806.00187'>Scaling Neural Machine Translation</a> | <a href='https://github.com/pytorch/fairseq/'>Github Repo</a></p>"""
44
 
45
+ examples = [
46
+ ["Hello world!"],
47
+ ["PyTorch Hub is a pre-trained model repository designed to facilitate research reproducibility."]
48
+ ]
49
 
50
+ gr.Interface.load("models/anzorq/m2m100_418M_ft_ru-kbd_44K", title=title, description=description, article=article, examples=examples).launch()