anzorq commited on
Commit
52d96d8
1 Parent(s): c4cb87b

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +26 -24
README.md CHANGED
@@ -92,31 +92,7 @@ pip install transformers sentencepiece torch ctranslate2
92
  ```
93
 
94
  ### 2. Inference
95
- ## Vanilla model
96
- ```Python
97
- from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
98
 
99
- model_path = "anzorq/m2m100_418M_ft_ru-kbd_44K"
100
- tgt_lang="zu"
101
-
102
- tokenizer = AutoTokenizer.from_pretrained(model_path)
103
- model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
104
-
105
- def translate(text, num_beams=4, num_return_sequences=4):
106
- inputs = tokenizer(text, return_tensors="pt")
107
- num_return_sequences = min(num_return_sequences, num_beams)
108
-
109
- translated_tokens = model.generate(
110
- **inputs, forced_bos_token_id=tokenizer.lang_code_to_id[tgt_lang], num_beams=num_beams, num_return_sequences=num_return_sequences
111
- )
112
-
113
- translations = [tokenizer.decode(translation, skip_special_tokens=True) for translation in translated_tokens]
114
- return text, translations
115
-
116
- # Test the translation
117
- text = "Текст для перевода"
118
- print(translate(text))
119
- ```
120
 
121
  ## CTranslate2 model (quantized model, much faster inference)
122
  First, download the files for the model in ctranslate2 format:
@@ -158,6 +134,32 @@ def translate(text, num_beams=4, num_return_sequences=4):
158
 
159
  return text, translations
160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  # Test the translation
162
  text = "Текст для перевода"
163
  print(translate(text))
 
92
  ```
93
 
94
  ### 2. Inference
 
 
 
95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
  ## CTranslate2 model (quantized model, much faster inference)
98
  First, download the files for the model in ctranslate2 format:
 
134
 
135
  return text, translations
136
 
137
+ # Test the translation
138
+ text = "Текст для перевода"
139
+ print(translate(text))
140
+ ```
141
+
142
+ ## Vanilla model
143
+ ```Python
144
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
145
+
146
+ model_path = "anzorq/m2m100_418M_ft_ru-kbd_44K"
147
+ tgt_lang="zu"
148
+
149
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
150
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
151
+
152
+ def translate(text, num_beams=4, num_return_sequences=4):
153
+ inputs = tokenizer(text, return_tensors="pt")
154
+ num_return_sequences = min(num_return_sequences, num_beams)
155
+
156
+ translated_tokens = model.generate(
157
+ **inputs, forced_bos_token_id=tokenizer.lang_code_to_id[tgt_lang], num_beams=num_beams, num_return_sequences=num_return_sequences
158
+ )
159
+
160
+ translations = [tokenizer.decode(translation, skip_special_tokens=True) for translation in translated_tokens]
161
+ return text, translations
162
+
163
  # Test the translation
164
  text = "Текст для перевода"
165
  print(translate(text))