Spaces:

abinashbordoloi
/

Anubaad-Assamese-Translation-Model_NLLB-200

Sleeping

App Files Files Community

abinashbordoloi commited on Dec 9, 2023

Commit

f89e277

•

1 Parent(s): 91eeaad

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -81

app.py CHANGED Viewed

@@ -1,90 +1,55 @@
-import os
-import torch
 import gradio as gr
-import time
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
 from supported_languages import LANGS
-def load_models():
-    # build model and tokenizer
-    model_name_dict = {
-		  # 'nllb-distilled-600M': 'facebook/nllb-200-distilled-600M',
-                  # 'nllb-1.3B': 'facebook/nllb-200-1.3B',
-                  'nllb-distilled-1.3B': 'facebook/nllb-200-distilled-1.3B',
-                  # #'nllb-3.3B': 'facebook/nllb-200-3.3B',
-                  }
-    model_dict = {}
-    for call_name, real_name in model_name_dict.items():
-        print('\tLoading model: %s' % call_name)
-        model = AutoModelForSeq2SeqLM.from_pretrained(real_name)
-        tokenizer = AutoTokenizer.from_pretrained(real_name)
-        model_dict[call_name+'_model'] = model
-        model_dict[call_name+'_tokenizer'] = tokenizer
-    return model_dict
-def translation(source, target, text):
-    if len(model_dict) == 2:
-        model_name = 'nllb-distilled-1.3B'
-    start_time = time.time()
-    source = LANGS[source]
-    target = LANGS[target]
-    model = model_dict[model_name + '_model']
-    tokenizer = model_dict[model_name + '_tokenizer']
-    translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang=source, tgt_lang=target)
-    output = translator(text, max_length=400)
-    end_time = time.time()
-    full_output = output
-    output = output[0]['translation_text']
-    result = {'inference_time': end_time - start_time,
-              'source': source,
-              'target': target,
-              'result': output,
-              'full_output': full_output}
-    return result
-if __name__ == '__main__':
-    print('\tinit models')
-    global model_dict
-    model_dict = load_models()
-    # define gradio demo
-    lang_codes = list(LANGS.keys())
-    #inputs = [gr.inputs.Radio(['nllb-distilled-600M', 'nllb-1.3B', 'nllb-distilled-1.3B'], label='NLLB Model'),
-    inputs = [gr.Dropdown(label='Source',choices = LANGS.keys()),
-              gr.Dropdown( label='Target',choices = LANGS.keys()),
-              gr.Textbox(label="Input text", lines=5,placeholder = "Enter text to translate"),
-              ]
-    outputs = gr.JSON()
-    title = "Anubaad-Assamese-Translation-Application-NLLB-200"
-    demo_status = "Demo is running on CPU"
-    description = f"Details: https://github.com/facebookresearch/fairseq/tree/nllb. {demo_status}"
-    examples = [
-    ['Chinese (Simplified)', 'English', '你吃饭了吗?']
-    ]
-    gr.Interface(translation,
-                 inputs,
-                 outputs,
-                 title=title,
-                 description=description,
-                 examples=examples,
-                 examples_per_page=50,
-                 ).launch()

 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
+import torch
+from ui import title, description, examples
 from supported_languages import LANGS
+#from langs_all import LANGS  ##for 200+ languages
+TASK = "translation"
+CKPT = "facebook/nllb-200-distilled-1.3B"
+#CKPT = "facebook/nllb-200-distilled-600M"
+model = AutoModelForSeq2SeqLM.from_pretrained(CKPT)
+tokenizer = AutoTokenizer.from_pretrained(CKPT)
+# device = 0 if torch.cuda.is_available() else -1
+def translate(text, src_lang, tgt_lang, max_length=512):
+    """
+    Translate the text from source lang to target lang
+    """
+    translation_pipeline = pipeline(TASK,
+                                    model=model,
+                                    tokenizer=tokenizer,
+                                    src_lang=src_lang,
+                                    tgt_lang=tgt_lang,
+                                    max_length=max_length)
+    # translation_pipeline = pipeline(TASK,
+    #                                 model=model,
+    #                                 tokenizer=tokenizer,
+    #                                 src_lang=src_lang,
+    #                                 tgt_lang=tgt_lang,
+    #                                 max_length=max_length,
+    #                                 device=device)
+    result = translation_pipeline(text)
+    return result[0]['translation_text']
+gr.Interface(
+    translate,
+    [
+        gr.Textbox(label="Text",placeholder ="Enter Your Text here"),
+        gr.Dropdown(label="Source Language", choices=LANGS.key()),
+        gr.Dropdown(label="Target Language", choices=LANGS.key()),
+        gr.Slider(8, 512, value=512, step=8, label="Max Length")
+    ],
+    ["text"],
+    examples=examples,
+    # article=article,
+    cache_examples=False,
+    title=title,
+    description=description
+).launch()