santhosh commited on
Commit
5c57059
1 Parent(s): f23cd83

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -6
app.py CHANGED
@@ -3,13 +3,23 @@ import gradio as gr
3
  from huggingface_hub import snapshot_download
4
  from sentencepiece import SentencePieceProcessor
5
 
6
- model_name="santhosh/madlad400-3b-ct2"
 
 
 
 
 
7
 
8
- tokenizer = SentencePieceProcessor()
 
9
  model_path = snapshot_download(model_name)
10
- print(model_path)
 
11
  tokenizer.load(f"{model_path}/sentencepiece.model")
12
  translator = ctranslate2.Translator(model_path)
 
 
 
13
 
14
  def translate(input_text, target_language):
15
  input_tokens = tokenizer.encode(f"<2{target_language}> {input_text}", out_type=str)
@@ -24,12 +34,23 @@ def translate(input_text, target_language):
24
  translated_sentence = tokenizer.decode(results[0].hypotheses[0])
25
  return translated_sentence
26
 
 
27
  def translate_interface(input_text, target_language):
28
  translated_text = translate(input_text, target_language)
29
  return translated_text
30
 
31
- input_text = gr.Textbox(label="Input Text", value="Imagine a world in which every single person on the planet is given free access to the sum of all human knowledge. ")
32
- target_language = gr.Textbox(value="ml", label="Target Language")
 
 
 
 
33
  output_text = gr.Textbox(label="Translated Text")
34
 
35
- gr.Interface(fn=translate_interface, inputs=[input_text, target_language], outputs=output_text).launch()
 
 
 
 
 
 
 
3
  from huggingface_hub import snapshot_download
4
  from sentencepiece import SentencePieceProcessor
5
 
6
+ title = "MADLAD-400 Translation Demo"
7
+ description = """
8
+ <p>
9
+ Translator using <a href='https://arxiv.org/abs/2309.04662' target='_blank'>MADLAD-400</a>, a multilingual machine translation model on 250 billion tokens covering over 450 languages using publicly available data. This demo application uses <a href="https://huggingface.co/santhosh/madlad400-3b-ct2">santhosh/madlad400-3b-ct2</a> model, which is a ctranslate2 optimized model of <a href="https://huggingface.co/google/madlad400-3b-mt">google/madlad400-3b-mt</a>
10
+ </p>
11
+ """
12
 
13
+
14
+ model_name = "santhosh/madlad400-3b-ct2"
15
  model_path = snapshot_download(model_name)
16
+
17
+ tokenizer = SentencePieceProcessor()
18
  tokenizer.load(f"{model_path}/sentencepiece.model")
19
  translator = ctranslate2.Translator(model_path)
20
+ tokens = [tokenizer.decode(i) for i in range(460)]
21
+ lang_codes = [token[2:-1] for token in tokens if token.startswith("<2")]
22
+
23
 
24
  def translate(input_text, target_language):
25
  input_tokens = tokenizer.encode(f"<2{target_language}> {input_text}", out_type=str)
 
34
  translated_sentence = tokenizer.decode(results[0].hypotheses[0])
35
  return translated_sentence
36
 
37
+
38
  def translate_interface(input_text, target_language):
39
  translated_text = translate(input_text, target_language)
40
  return translated_text
41
 
42
+
43
+ input_text = gr.Textbox(
44
+ label="Input Text",
45
+ value="Imagine a world in which every single person on the planet is given free access to the sum of all human knowledge.",
46
+ )
47
+ target_language = gr.Dropdown(lang_codes, value="en", label="Target Language")
48
  output_text = gr.Textbox(label="Translated Text")
49
 
50
+ gr.Interface(
51
+ title=title,
52
+ description=description,
53
+ fn=translate_interface,
54
+ inputs=[input_text, target_language],
55
+ outputs=output_text,
56
+ ).launch()