Spaces:
Runtime error
Runtime error
bayartsogt
commited on
Commit
•
0afb4f9
1
Parent(s):
3fff62f
trust_remote_code=True
Browse files- app.py +1 -1
- test_transformers.py +12 -0
app.py
CHANGED
@@ -19,7 +19,7 @@ openai_tokenizer_list = [
|
|
19 |
|
20 |
# load tokenizers
|
21 |
hf_tokenizers = [
|
22 |
-
AutoTokenizer.from_pretrained(model_name_or_id, use_fast=use_fast)
|
23 |
for model_name_or_id, use_fast in hf_tokenizer_list
|
24 |
]
|
25 |
|
|
|
19 |
|
20 |
# load tokenizers
|
21 |
hf_tokenizers = [
|
22 |
+
AutoTokenizer.from_pretrained(model_name_or_id, use_fast=use_fast, trust_remote_code=True)
|
23 |
for model_name_or_id, use_fast in hf_tokenizer_list
|
24 |
]
|
25 |
|
test_transformers.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import AutoTokenizer
|
2 |
+
|
3 |
+
|
4 |
+
tokenizer = AutoTokenizer.from_pretrained('tugstugi/bert-large-mongolian-cased', use_fast=False)
|
5 |
+
|
6 |
+
|
7 |
+
test_input = "Мөнгөө тушаачихсаныхаа дараа мэдэгдээрэй"
|
8 |
+
|
9 |
+
print("input:", test_input)
|
10 |
+
print("tokenizer.encode()", tokenizer.encode(test_input))
|
11 |
+
print("tokenizer decode", [(tokenizer.decode(token_id), token_id) for token_id in tokenizer.encode(test_input)])
|
12 |
+
print("tokenizer()", tokenizer(test_input))
|