KoichiYasuoka
commited on
Commit
•
4861a26
1
Parent(s):
b853393
model improved
Browse files- config.json +3 -25
- pytorch_model.bin +2 -2
- special_tokens_map.json +51 -1
- supar.model +2 -2
- tokenizer.json +2 -1
- tokenizer_config.json +61 -1
config.json
CHANGED
@@ -265,8 +265,7 @@
|
|
265 |
"252": "VERB+PART",
|
266 |
"253": "VERB+PUNCT",
|
267 |
"254": "VERB+VERB",
|
268 |
-
"255": "X"
|
269 |
-
"256": "X+X"
|
270 |
},
|
271 |
"initializer_range": 0.02,
|
272 |
"intermediate_size": 3072,
|
@@ -526,8 +525,7 @@
|
|
526 |
"VERB+PART": 252,
|
527 |
"VERB+PUNCT": 253,
|
528 |
"VERB+VERB": 254,
|
529 |
-
"X": 255
|
530 |
-
"X+X": 256
|
531 |
},
|
532 |
"layer_norm_eps": 1e-12,
|
533 |
"max_position_embeddings": 512,
|
@@ -1375,10 +1373,6 @@
|
|
1375 |
"\u0e42\u0e14\u0e22\u0e40\u0e2a\u0e23\u0e34\u0e21": [
|
1376 |
"\u0e42\u0e14\u0e22",
|
1377 |
"\u0e40\u0e2a\u0e23\u0e34\u0e21"
|
1378 |
-
],
|
1379 |
-
"\u0e42\u0e14\u0e22\u0e40\u0e40\u0e1a\u0e48\u0e07": [
|
1380 |
-
"\u0e42\u0e14\u0e22",
|
1381 |
-
"\u0e40\u0e40\u0e1a\u0e48\u0e07"
|
1382 |
]
|
1383 |
},
|
1384 |
"DET+DET": {
|
@@ -4814,10 +4808,6 @@
|
|
4814 |
"\u0e27\u0e38\u0e12\u0e34\u0e2a\u0e20\u0e32",
|
4815 |
"\u0e1e.\u0e28."
|
4816 |
],
|
4817 |
-
"\u0e2a\u0e38\u0e23\u0e2a\u0e31\u0e08\u0e08\u0e30": [
|
4818 |
-
"\u0e2a\u0e38\u0e23\u0e2a\u0e31\u0e08\u0e08",
|
4819 |
-
"\u0e30"
|
4820 |
-
],
|
4821 |
"\u0e2d.\u0e2d\u0e48\u0e32\u0e27\u0e25\u0e36\u0e01": [
|
4822 |
"\u0e2d.\u0e2d\u0e48\u0e32\u0e27\u0e25\u0e36",
|
4823 |
"\u0e01"
|
@@ -6352,10 +6342,6 @@
|
|
6352 |
"\u0e1a\u0e23\u0e34\u0e01\u0e32\u0e23",
|
6353 |
"\u0e40\u0e0a\u0e37\u0e49\u0e2d"
|
6354 |
],
|
6355 |
-
"\u0e1b\u0e23\u0e30\u0e08\u0e33\u0e17\u0e32\u0e07": [
|
6356 |
-
"\u0e1b\u0e23\u0e30\u0e08",
|
6357 |
-
"\u0e33\u0e17\u0e32\u0e07"
|
6358 |
-
],
|
6359 |
"\u0e1b\u0e23\u0e30\u0e0a\u0e38\u0e21\u0e19\u0e23\u0e32\u0e17\u0e31\u0e28\u0e19\u0e4c": [
|
6360 |
"\u0e1b\u0e23\u0e30\u0e0a\u0e38\u0e21",
|
6361 |
"\u0e19\u0e23\u0e32\u0e17\u0e31\u0e28\u0e19\u0e4c"
|
@@ -6852,10 +6838,6 @@
|
|
6852 |
"\u0e23\u0e31\u0e01\u0e29\u0e32\u0e01\u0e32\u0e23",
|
6853 |
"\u0e40\u0e09\u0e1e\u0e32\u0e30"
|
6854 |
],
|
6855 |
-
"\u0e23\u0e31\u0e1a\u0e1c\u0e34\u0e14\u0e0a\u0e2d\u0e1a": [
|
6856 |
-
"\u0e23",
|
6857 |
-
"\u0e31\u0e1a\u0e1c\u0e34\u0e14\u0e0a\u0e2d\u0e1a"
|
6858 |
-
],
|
6859 |
"\u0e23\u0e39\u0e49\u0e08\u0e31\u0e01\u0e23\u0e30\u0e21\u0e31\u0e14\u0e23\u0e30\u0e27\u0e31\u0e07": [
|
6860 |
"\u0e23\u0e39\u0e49\u0e08\u0e31\u0e01",
|
6861 |
"\u0e23\u0e30\u0e21\u0e31\u0e14\u0e23\u0e30\u0e27\u0e31\u0e07"
|
@@ -7062,17 +7044,13 @@
|
|
7062 |
"\u0e42\u0e19\u0e49\u0e15\u0e1a\u0e38\u0e4a\u0e04\u0e23\u0e38\u0e48\u0e19": [
|
7063 |
"\u0e42\u0e19\u0e49\u0e15\u0e1a\u0e38\u0e4a",
|
7064 |
"\u0e04\u0e23\u0e38\u0e48\u0e19"
|
7065 |
-
],
|
7066 |
-
"\u0e42\u0e1b\u0e23": [
|
7067 |
-
"\u0e42",
|
7068 |
-
"\u0e1b\u0e23"
|
7069 |
]
|
7070 |
}
|
7071 |
}
|
7072 |
},
|
7073 |
"tokenizer_class": "RemBertTokenizerFast",
|
7074 |
"torch_dtype": "float32",
|
7075 |
-
"transformers_version": "4.
|
7076 |
"type_vocab_size": 2,
|
7077 |
"use_cache": true,
|
7078 |
"vocab_size": 3005
|
|
|
265 |
"252": "VERB+PART",
|
266 |
"253": "VERB+PUNCT",
|
267 |
"254": "VERB+VERB",
|
268 |
+
"255": "X"
|
|
|
269 |
},
|
270 |
"initializer_range": 0.02,
|
271 |
"intermediate_size": 3072,
|
|
|
525 |
"VERB+PART": 252,
|
526 |
"VERB+PUNCT": 253,
|
527 |
"VERB+VERB": 254,
|
528 |
+
"X": 255
|
|
|
529 |
},
|
530 |
"layer_norm_eps": 1e-12,
|
531 |
"max_position_embeddings": 512,
|
|
|
1373 |
"\u0e42\u0e14\u0e22\u0e40\u0e2a\u0e23\u0e34\u0e21": [
|
1374 |
"\u0e42\u0e14\u0e22",
|
1375 |
"\u0e40\u0e2a\u0e23\u0e34\u0e21"
|
|
|
|
|
|
|
|
|
1376 |
]
|
1377 |
},
|
1378 |
"DET+DET": {
|
|
|
4808 |
"\u0e27\u0e38\u0e12\u0e34\u0e2a\u0e20\u0e32",
|
4809 |
"\u0e1e.\u0e28."
|
4810 |
],
|
|
|
|
|
|
|
|
|
4811 |
"\u0e2d.\u0e2d\u0e48\u0e32\u0e27\u0e25\u0e36\u0e01": [
|
4812 |
"\u0e2d.\u0e2d\u0e48\u0e32\u0e27\u0e25\u0e36",
|
4813 |
"\u0e01"
|
|
|
6342 |
"\u0e1a\u0e23\u0e34\u0e01\u0e32\u0e23",
|
6343 |
"\u0e40\u0e0a\u0e37\u0e49\u0e2d"
|
6344 |
],
|
|
|
|
|
|
|
|
|
6345 |
"\u0e1b\u0e23\u0e30\u0e0a\u0e38\u0e21\u0e19\u0e23\u0e32\u0e17\u0e31\u0e28\u0e19\u0e4c": [
|
6346 |
"\u0e1b\u0e23\u0e30\u0e0a\u0e38\u0e21",
|
6347 |
"\u0e19\u0e23\u0e32\u0e17\u0e31\u0e28\u0e19\u0e4c"
|
|
|
6838 |
"\u0e23\u0e31\u0e01\u0e29\u0e32\u0e01\u0e32\u0e23",
|
6839 |
"\u0e40\u0e09\u0e1e\u0e32\u0e30"
|
6840 |
],
|
|
|
|
|
|
|
|
|
6841 |
"\u0e23\u0e39\u0e49\u0e08\u0e31\u0e01\u0e23\u0e30\u0e21\u0e31\u0e14\u0e23\u0e30\u0e27\u0e31\u0e07": [
|
6842 |
"\u0e23\u0e39\u0e49\u0e08\u0e31\u0e01",
|
6843 |
"\u0e23\u0e30\u0e21\u0e31\u0e14\u0e23\u0e30\u0e27\u0e31\u0e07"
|
|
|
7044 |
"\u0e42\u0e19\u0e49\u0e15\u0e1a\u0e38\u0e4a\u0e04\u0e23\u0e38\u0e48\u0e19": [
|
7045 |
"\u0e42\u0e19\u0e49\u0e15\u0e1a\u0e38\u0e4a",
|
7046 |
"\u0e04\u0e23\u0e38\u0e48\u0e19"
|
|
|
|
|
|
|
|
|
7047 |
]
|
7048 |
}
|
7049 |
}
|
7050 |
},
|
7051 |
"tokenizer_class": "RemBertTokenizerFast",
|
7052 |
"torch_dtype": "float32",
|
7053 |
+
"transformers_version": "4.40.1",
|
7054 |
"type_vocab_size": 2,
|
7055 |
"use_cache": true,
|
7056 |
"vocab_size": 3005
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0fbc8c947cdf772a281a527db16a660817e9d0114ddfca69e39050de3e276b22
|
3 |
+
size 351890598
|
special_tokens_map.json
CHANGED
@@ -1 +1,51 @@
|
|
1 |
-
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"cls_token": {
|
10 |
+
"content": "[CLS]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"eos_token": {
|
17 |
+
"content": "[SEP]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"mask_token": {
|
24 |
+
"content": "[MASK]",
|
25 |
+
"lstrip": true,
|
26 |
+
"normalized": true,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"pad_token": {
|
31 |
+
"content": "[PAD]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
},
|
37 |
+
"sep_token": {
|
38 |
+
"content": "[SEP]",
|
39 |
+
"lstrip": false,
|
40 |
+
"normalized": false,
|
41 |
+
"rstrip": false,
|
42 |
+
"single_word": false
|
43 |
+
},
|
44 |
+
"unk_token": {
|
45 |
+
"content": "[UNK]",
|
46 |
+
"lstrip": false,
|
47 |
+
"normalized": false,
|
48 |
+
"rstrip": false,
|
49 |
+
"single_word": false
|
50 |
+
}
|
51 |
+
}
|
supar.model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b27af124535b799117cc9fd151f54186c2a05a26db7cf5190e086164fcda807
|
3 |
+
size 399959990
|
tokenizer.json
CHANGED
@@ -12167,6 +12167,7 @@
|
|
12167 |
"õ",
|
12168 |
-16.647894589647784
|
12169 |
]
|
12170 |
-
]
|
|
|
12171 |
}
|
12172 |
}
|
|
|
12167 |
"õ",
|
12168 |
-16.647894589647784
|
12169 |
]
|
12170 |
+
],
|
12171 |
+
"byte_fallback": false
|
12172 |
}
|
12173 |
}
|
tokenizer_config.json
CHANGED
@@ -1 +1,61 @@
|
|
1 |
-
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[CLS]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "[PAD]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"2": {
|
20 |
+
"content": "[SEP]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"3": {
|
28 |
+
"content": "[UNK]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"4": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": true,
|
38 |
+
"normalized": true,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"bos_token": "[CLS]",
|
45 |
+
"clean_up_tokenization_spaces": true,
|
46 |
+
"cls_token": "[CLS]",
|
47 |
+
"do_lower_case": false,
|
48 |
+
"eos_token": "[SEP]",
|
49 |
+
"keep_accents": true,
|
50 |
+
"mask_token": "[MASK]",
|
51 |
+
"max_length": 510,
|
52 |
+
"model_max_length": 512,
|
53 |
+
"pad_token": "[PAD]",
|
54 |
+
"remove_space": true,
|
55 |
+
"sep_token": "[SEP]",
|
56 |
+
"stride": 0,
|
57 |
+
"tokenizer_class": "RemBertTokenizerFast",
|
58 |
+
"truncation_side": "right",
|
59 |
+
"truncation_strategy": "longest_first",
|
60 |
+
"unk_token": "[UNK]"
|
61 |
+
}
|