Update spaCy pipeline
Browse files- README.md +0 -0
- config.cfg +13 -33
- grc_proiel_trf-any-py3-none-any.whl +2 -2
- lemmatizer/cfg +0 -0
- lemmatizer/model +2 -2
- lemmatizer/trees +2 -2
- meta.json +150 -149
- morphologizer/cfg +4 -0
- morphologizer/model +2 -2
- parser/model +2 -2
- parser/moves +1 -1
- tagger/cfg +2 -1
- tagger/model +0 -0
- transformer/model +2 -2
- vocab/strings.json +2 -2
- vocab/vectors +2 -2
- vocab/vectors.cfg +1 -8
README.md
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
config.cfg
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
[paths]
|
2 |
-
train = "corpus/train/grc_proiel-ud-train.spacy"
|
3 |
-
dev = "corpus/dev/grc_proiel-ud-dev.spacy"
|
4 |
-
vectors = "vectors/large"
|
5 |
init_tok2vec = null
|
|
|
6 |
|
7 |
[system]
|
8 |
gpu_allocator = "pytorch"
|
@@ -10,7 +10,7 @@ seed = 1
|
|
10 |
|
11 |
[nlp]
|
12 |
lang = "grc"
|
13 |
-
pipeline = ["transformer","morphologizer","tagger","parser","
|
14 |
batch_size = 128
|
15 |
disabled = []
|
16 |
before_creation = null
|
@@ -89,26 +89,6 @@ grad_factor = 1.0
|
|
89 |
pooling = {"@layers":"reduce_mean.v1"}
|
90 |
upstream = "transformer"
|
91 |
|
92 |
-
[components.senter]
|
93 |
-
factory = "senter"
|
94 |
-
overwrite = false
|
95 |
-
scorer = {"@scorers":"spacy.senter_scorer.v1"}
|
96 |
-
|
97 |
-
[components.senter.model]
|
98 |
-
@architectures = "spacy.Tagger.v2"
|
99 |
-
nO = null
|
100 |
-
normalize = false
|
101 |
-
|
102 |
-
[components.senter.model.tok2vec]
|
103 |
-
@architectures = "spacy.HashEmbedCNN.v2"
|
104 |
-
pretrained_vectors = true
|
105 |
-
width = 12
|
106 |
-
depth = 1
|
107 |
-
embed_size = 2000
|
108 |
-
window_size = 1
|
109 |
-
maxout_pieces = 2
|
110 |
-
subword_features = true
|
111 |
-
|
112 |
[components.tagger]
|
113 |
factory = "tagger"
|
114 |
label_smoothing = 0.0
|
@@ -133,7 +113,7 @@ set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotati
|
|
133 |
|
134 |
[components.transformer.model]
|
135 |
@architectures = "spacy-transformers.TransformerModel.v3"
|
136 |
-
name = "
|
137 |
mixed_precision = false
|
138 |
|
139 |
[components.transformer.model.get_spans]
|
@@ -178,7 +158,7 @@ max_epochs = 0
|
|
178 |
max_steps = 20000
|
179 |
eval_frequency = 200
|
180 |
frozen_components = ["lemmatizer"]
|
181 |
-
annotating_components = [
|
182 |
before_to_disk = null
|
183 |
before_update = null
|
184 |
|
@@ -215,27 +195,27 @@ total_steps = 20000
|
|
215 |
initial_rate = 0.00005
|
216 |
|
217 |
[training.score_weights]
|
218 |
-
pos_acc = 0.
|
219 |
-
morph_acc = 0.
|
220 |
morph_per_feat = null
|
221 |
-
tag_acc = 0.
|
222 |
-
dep_uas = 0.
|
223 |
-
dep_las = 0.
|
224 |
dep_las_per_type = null
|
225 |
sents_p = null
|
226 |
sents_r = null
|
227 |
sents_f = 0.0
|
228 |
-
lemma_acc = 0.
|
229 |
|
230 |
[pretraining]
|
231 |
|
232 |
[initialize]
|
233 |
-
vectors = ${paths.vectors}
|
234 |
init_tok2vec = ${paths.init_tok2vec}
|
235 |
vocab_data = null
|
236 |
lookups = null
|
237 |
before_init = null
|
238 |
after_init = null
|
|
|
239 |
|
240 |
[initialize.components]
|
241 |
|
|
|
1 |
[paths]
|
2 |
+
train = "corpus/proiel/train/grc_proiel-ud-train.spacy"
|
3 |
+
dev = "corpus/proiel/dev/grc_proiel-ud-dev.spacy"
|
|
|
4 |
init_tok2vec = null
|
5 |
+
vectors = null
|
6 |
|
7 |
[system]
|
8 |
gpu_allocator = "pytorch"
|
|
|
10 |
|
11 |
[nlp]
|
12 |
lang = "grc"
|
13 |
+
pipeline = ["transformer","morphologizer","tagger","parser","lemmatizer","attribute_ruler"]
|
14 |
batch_size = 128
|
15 |
disabled = []
|
16 |
before_creation = null
|
|
|
89 |
pooling = {"@layers":"reduce_mean.v1"}
|
90 |
upstream = "transformer"
|
91 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
[components.tagger]
|
93 |
factory = "tagger"
|
94 |
label_smoothing = 0.0
|
|
|
113 |
|
114 |
[components.transformer.model]
|
115 |
@architectures = "spacy-transformers.TransformerModel.v3"
|
116 |
+
name = "cabrooks/LOGION-50k_wordpiece"
|
117 |
mixed_precision = false
|
118 |
|
119 |
[components.transformer.model.get_spans]
|
|
|
158 |
max_steps = 20000
|
159 |
eval_frequency = 200
|
160 |
frozen_components = ["lemmatizer"]
|
161 |
+
annotating_components = []
|
162 |
before_to_disk = null
|
163 |
before_update = null
|
164 |
|
|
|
195 |
initial_rate = 0.00005
|
196 |
|
197 |
[training.score_weights]
|
198 |
+
pos_acc = 0.06
|
199 |
+
morph_acc = 0.06
|
200 |
morph_per_feat = null
|
201 |
+
tag_acc = 0.15
|
202 |
+
dep_uas = 0.06
|
203 |
+
dep_las = 0.06
|
204 |
dep_las_per_type = null
|
205 |
sents_p = null
|
206 |
sents_r = null
|
207 |
sents_f = 0.0
|
208 |
+
lemma_acc = 0.61
|
209 |
|
210 |
[pretraining]
|
211 |
|
212 |
[initialize]
|
|
|
213 |
init_tok2vec = ${paths.init_tok2vec}
|
214 |
vocab_data = null
|
215 |
lookups = null
|
216 |
before_init = null
|
217 |
after_init = null
|
218 |
+
vectors = ${paths.vectors}
|
219 |
|
220 |
[initialize.components]
|
221 |
|
grc_proiel_trf-any-py3-none-any.whl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:555cdbb17adee883902051334423989b70065cbbdad0c37b73e175caacd9ef2b
|
3 |
+
size 495436349
|
lemmatizer/cfg
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
lemmatizer/model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ee3567720a57a22b28099f9ccd9c39ee5b455f448e531763e00d59219f0b4671
|
3 |
+
size 27089840
|
lemmatizer/trees
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a53607dd846f3e82212b030f5fde5e8487e55fe146f7f90da6078e87837bc0a
|
3 |
+
size 6009899
|
meta.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
"lang":"grc",
|
3 |
"name":"proiel_trf",
|
4 |
-
"version":"3.7",
|
5 |
"description":"",
|
6 |
"author":"",
|
7 |
"email":"",
|
@@ -10,10 +10,10 @@
|
|
10 |
"spacy_version":">=3.7.4,<3.8.0",
|
11 |
"spacy_git_version":"bff8725f4",
|
12 |
"vectors":{
|
13 |
-
"width":
|
14 |
-
"vectors":
|
15 |
-
"keys"
|
16 |
-
"name":
|
17 |
},
|
18 |
"labels":{
|
19 |
"transformer":[
|
@@ -45,6 +45,7 @@
|
|
45 |
"Case=Acc|Gender=Fem|Number=Sing|POS=NOUN",
|
46 |
"Aspect=Perf|Mood=Ind|Number=Plur|POS=VERB|Person=3|Tense=Past|VerbForm=Fin|Voice=Act",
|
47 |
"Case=Dat|Gender=Masc|Number=Plur|POS=PRON|PronType=Rcp",
|
|
|
48 |
"Case=Nom|Definite=Def|Gender=Masc|Number=Plur|POS=DET|PronType=Dem",
|
49 |
"Case=Nom|Gender=Masc|Number=Plur|POS=NOUN",
|
50 |
"Case=Acc|Gender=Masc|Number=Plur|POS=NOUN",
|
@@ -727,6 +728,7 @@
|
|
727 |
"Aspect=Perf|Case=Nom|Gender=Neut|Number=Plur|POS=VERB|Tense=Past|VerbForm=Part|Voice=Act",
|
728 |
"Aspect=Perf|Mood=Sub|Number=Plur|POS=VERB|Person=2|Tense=Past|VerbForm=Fin|Voice=Mid",
|
729 |
"Case=Dat|Degree=Sup|Gender=Fem|Number=Sing|POS=ADJ",
|
|
|
730 |
"Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|POS=VERB|Tense=Past|VerbForm=Part|Voice=Pass",
|
731 |
"Case=Acc|Gender=Fem,Masc|Number=Sing|POS=NOUN",
|
732 |
"Case=Dat|Gender=Fem,Masc|Number=Sing|POS=NOUN",
|
@@ -1073,7 +1075,8 @@
|
|
1073 |
"Px",
|
1074 |
"R-",
|
1075 |
"S-",
|
1076 |
-
"V-"
|
|
|
1077 |
],
|
1078 |
"parser":[
|
1079 |
"ROOT",
|
@@ -1105,6 +1108,7 @@
|
|
1105 |
"obl:agent",
|
1106 |
"orphan",
|
1107 |
"parataxis",
|
|
|
1108 |
"vocative",
|
1109 |
"xcomp"
|
1110 |
],
|
@@ -1117,7 +1121,6 @@
|
|
1117 |
"morphologizer",
|
1118 |
"tagger",
|
1119 |
"parser",
|
1120 |
-
"senter",
|
1121 |
"lemmatizer",
|
1122 |
"attribute_ruler"
|
1123 |
],
|
@@ -1126,7 +1129,6 @@
|
|
1126 |
"morphologizer",
|
1127 |
"tagger",
|
1128 |
"parser",
|
1129 |
-
"senter",
|
1130 |
"lemmatizer",
|
1131 |
"attribute_ruler"
|
1132 |
],
|
@@ -1134,33 +1136,33 @@
|
|
1134 |
|
1135 |
],
|
1136 |
"performance":{
|
1137 |
-
"pos_acc":0.
|
1138 |
-
"morph_acc":0.
|
1139 |
"morph_per_feat":{
|
1140 |
"Case":{
|
1141 |
-
"p":0.
|
1142 |
-
"r":0.
|
1143 |
-
"f":0.
|
1144 |
},
|
1145 |
"Gender":{
|
1146 |
-
"p":0.
|
1147 |
-
"r":0.
|
1148 |
-
"f":0.
|
1149 |
},
|
1150 |
"Number":{
|
1151 |
-
"p":0.
|
1152 |
-
"r":0.
|
1153 |
-
"f":0.
|
1154 |
},
|
1155 |
"Person":{
|
1156 |
-
"p":0.
|
1157 |
-
"r":0.
|
1158 |
-
"f":0.
|
1159 |
},
|
1160 |
"PronType":{
|
1161 |
-
"p":0.
|
1162 |
-
"r":0.
|
1163 |
-
"f":0.
|
1164 |
},
|
1165 |
"Polarity":{
|
1166 |
"p":1.0,
|
@@ -1168,39 +1170,39 @@
|
|
1168 |
"f":0.9895287958
|
1169 |
},
|
1170 |
"Aspect":{
|
1171 |
-
"p":0.
|
1172 |
-
"r":0.
|
1173 |
-
"f":0.
|
1174 |
},
|
1175 |
"Mood":{
|
1176 |
-
"p":0.
|
1177 |
-
"r":0.
|
1178 |
-
"f":0.
|
1179 |
},
|
1180 |
"Tense":{
|
1181 |
-
"p":0.
|
1182 |
-
"r":0.
|
1183 |
-
"f":0.
|
1184 |
},
|
1185 |
"VerbForm":{
|
1186 |
-
"p":0.
|
1187 |
-
"r":0.
|
1188 |
-
"f":0.
|
1189 |
},
|
1190 |
"Voice":{
|
1191 |
-
"p":0.
|
1192 |
-
"r":0.
|
1193 |
-
"f":0.
|
1194 |
},
|
1195 |
"Degree":{
|
1196 |
-
"p":0.
|
1197 |
-
"r":0.
|
1198 |
-
"f":0.
|
1199 |
},
|
1200 |
"Definite":{
|
1201 |
-
"p":0.
|
1202 |
-
"r":0.
|
1203 |
-
"f":0.
|
1204 |
},
|
1205 |
"Reflex":{
|
1206 |
"p":1.0,
|
@@ -1209,163 +1211,163 @@
|
|
1209 |
},
|
1210 |
"Poss":{
|
1211 |
"p":1.0,
|
1212 |
-
"r":0.
|
1213 |
-
"f":0.
|
1214 |
}
|
1215 |
},
|
1216 |
-
"tag_acc":0.
|
1217 |
-
"dep_uas":0.
|
1218 |
-
"dep_las":0.
|
1219 |
"dep_las_per_type":{
|
1220 |
"nsubj":{
|
1221 |
-
"p":0.
|
1222 |
-
"r":0.
|
1223 |
-
"f":0.
|
1224 |
},
|
1225 |
"discourse":{
|
1226 |
-
"p":0.
|
1227 |
-
"r":0.
|
1228 |
-
"f":0.
|
1229 |
},
|
1230 |
"mark":{
|
1231 |
-
"p":0.
|
1232 |
-
"r":0.
|
1233 |
-
"f":0.
|
1234 |
},
|
1235 |
"advmod":{
|
1236 |
-
"p":0.
|
1237 |
-
"r":0.
|
1238 |
-
"f":0.
|
1239 |
},
|
1240 |
"advcl":{
|
1241 |
-
"p":0.
|
1242 |
-
"r":0.
|
1243 |
-
"f":0.
|
1244 |
},
|
1245 |
"xcomp":{
|
1246 |
-
"p":0.
|
1247 |
-
"r":0.
|
1248 |
-
"f":0.
|
1249 |
},
|
1250 |
"cop":{
|
1251 |
-
"p":0.
|
1252 |
-
"r":0.
|
1253 |
-
"f":0.
|
1254 |
},
|
1255 |
"root":{
|
1256 |
-
"p":0.
|
1257 |
-
"r":0.
|
1258 |
-
"f":0.
|
1259 |
},
|
1260 |
"det":{
|
1261 |
-
"p":0.
|
1262 |
-
"r":0.
|
1263 |
-
"f":0.
|
1264 |
},
|
1265 |
"nmod":{
|
1266 |
-
"p":0.
|
1267 |
-
"r":0.
|
1268 |
-
"f":0.
|
1269 |
},
|
1270 |
"obj":{
|
1271 |
-
"p":0.
|
1272 |
-
"r":0.
|
1273 |
-
"f":0.
|
1274 |
},
|
1275 |
"case":{
|
1276 |
-
"p":0.
|
1277 |
-
"r":0.
|
1278 |
-
"f":0.
|
1279 |
},
|
1280 |
"obl":{
|
1281 |
-
"p":0.
|
1282 |
-
"r":0.
|
1283 |
-
"f":0.
|
1284 |
},
|
1285 |
"cc":{
|
1286 |
-
"p":0.
|
1287 |
-
"r":0.
|
1288 |
-
"f":0.
|
1289 |
},
|
1290 |
"conj":{
|
1291 |
-
"p":0.
|
1292 |
-
"r":0.
|
1293 |
-
"f":0.
|
1294 |
},
|
1295 |
"obl:agent":{
|
1296 |
-
"p":0.
|
1297 |
-
"r":0.
|
1298 |
-
"f":0.
|
1299 |
},
|
1300 |
"ccomp":{
|
1301 |
-
"p":0.
|
1302 |
-
"r":0.
|
1303 |
-
"f":0.
|
1304 |
},
|
1305 |
"nsubj:pass":{
|
1306 |
-
"p":0.
|
1307 |
-
"r":0.
|
1308 |
-
"f":0.
|
1309 |
},
|
1310 |
"amod":{
|
1311 |
-
"p":0.
|
1312 |
-
"r":0.
|
1313 |
-
"f":0.
|
1314 |
},
|
1315 |
"acl":{
|
1316 |
-
"p":0.
|
1317 |
"r":0.5151515152,
|
1318 |
-
"f":0.
|
1319 |
},
|
1320 |
"iobj":{
|
1321 |
-
"p":0.
|
1322 |
-
"r":0.
|
1323 |
-
"f":0.
|
1324 |
},
|
1325 |
"nummod":{
|
1326 |
-
"p":0.
|
1327 |
-
"r":0.
|
1328 |
-
"f":0.
|
1329 |
},
|
1330 |
"vocative":{
|
1331 |
-
"p":0.
|
1332 |
-
"r":0.
|
1333 |
-
"f":0.
|
1334 |
},
|
1335 |
"orphan":{
|
1336 |
-
"p":0.
|
1337 |
-
"r":0.
|
1338 |
-
"f":0.
|
1339 |
},
|
1340 |
"appos":{
|
1341 |
-
"p":0.
|
1342 |
-
"r":0.
|
1343 |
-
"f":0.
|
1344 |
},
|
1345 |
-
"
|
1346 |
-
"p":0.
|
1347 |
-
"r":0.
|
1348 |
-
"f":0.
|
1349 |
},
|
1350 |
"dep":{
|
1351 |
"p":0.0,
|
1352 |
"r":0.0,
|
1353 |
"f":0.0
|
1354 |
},
|
1355 |
-
"
|
1356 |
-
"p":0.
|
1357 |
-
"r":0.
|
1358 |
-
"f":0.
|
1359 |
},
|
1360 |
"csubj:pass":{
|
1361 |
-
"p":0.
|
1362 |
-
"r":0.
|
1363 |
-
"f":0.
|
1364 |
},
|
1365 |
"flat:name":{
|
1366 |
-
"p":0.
|
1367 |
"r":0.5909090909,
|
1368 |
-
"f":0.
|
1369 |
},
|
1370 |
"aux:pass":{
|
1371 |
"p":0.0,
|
@@ -1374,8 +1376,8 @@
|
|
1374 |
},
|
1375 |
"fixed":{
|
1376 |
"p":1.0,
|
1377 |
-
"r":0.
|
1378 |
-
"f":0.
|
1379 |
},
|
1380 |
"aux":{
|
1381 |
"p":0.0,
|
@@ -1383,15 +1385,14 @@
|
|
1383 |
"f":0.0
|
1384 |
}
|
1385 |
},
|
1386 |
-
"sents_p":0.
|
1387 |
-
"sents_r":0.
|
1388 |
-
"sents_f":0.
|
1389 |
-
"lemma_acc":0.
|
1390 |
-
"transformer_loss":
|
1391 |
-
"morphologizer_loss":
|
1392 |
-
"tagger_loss":
|
1393 |
-
"parser_loss":
|
1394 |
-
"senter_loss":25970.2717437744
|
1395 |
},
|
1396 |
"requirements":[
|
1397 |
"spacy-transformers>=1.3.4,<1.4.0"
|
|
|
1 |
{
|
2 |
"lang":"grc",
|
3 |
"name":"proiel_trf",
|
4 |
+
"version":"3.7.4",
|
5 |
"description":"",
|
6 |
"author":"",
|
7 |
"email":"",
|
|
|
10 |
"spacy_version":">=3.7.4,<3.8.0",
|
11 |
"spacy_git_version":"bff8725f4",
|
12 |
"vectors":{
|
13 |
+
"width":0,
|
14 |
+
"vectors":0,
|
15 |
+
"keys":0,
|
16 |
+
"name":null
|
17 |
},
|
18 |
"labels":{
|
19 |
"transformer":[
|
|
|
45 |
"Case=Acc|Gender=Fem|Number=Sing|POS=NOUN",
|
46 |
"Aspect=Perf|Mood=Ind|Number=Plur|POS=VERB|Person=3|Tense=Past|VerbForm=Fin|Voice=Act",
|
47 |
"Case=Dat|Gender=Masc|Number=Plur|POS=PRON|PronType=Rcp",
|
48 |
+
"POS=PUNCT",
|
49 |
"Case=Nom|Definite=Def|Gender=Masc|Number=Plur|POS=DET|PronType=Dem",
|
50 |
"Case=Nom|Gender=Masc|Number=Plur|POS=NOUN",
|
51 |
"Case=Acc|Gender=Masc|Number=Plur|POS=NOUN",
|
|
|
728 |
"Aspect=Perf|Case=Nom|Gender=Neut|Number=Plur|POS=VERB|Tense=Past|VerbForm=Part|Voice=Act",
|
729 |
"Aspect=Perf|Mood=Sub|Number=Plur|POS=VERB|Person=2|Tense=Past|VerbForm=Fin|Voice=Mid",
|
730 |
"Case=Dat|Degree=Sup|Gender=Fem|Number=Sing|POS=ADJ",
|
731 |
+
"POS=AUX",
|
732 |
"Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|POS=VERB|Tense=Past|VerbForm=Part|Voice=Pass",
|
733 |
"Case=Acc|Gender=Fem,Masc|Number=Sing|POS=NOUN",
|
734 |
"Case=Dat|Gender=Fem,Masc|Number=Sing|POS=NOUN",
|
|
|
1075 |
"Px",
|
1076 |
"R-",
|
1077 |
"S-",
|
1078 |
+
"V-",
|
1079 |
+
"Z"
|
1080 |
],
|
1081 |
"parser":[
|
1082 |
"ROOT",
|
|
|
1108 |
"obl:agent",
|
1109 |
"orphan",
|
1110 |
"parataxis",
|
1111 |
+
"punct",
|
1112 |
"vocative",
|
1113 |
"xcomp"
|
1114 |
],
|
|
|
1121 |
"morphologizer",
|
1122 |
"tagger",
|
1123 |
"parser",
|
|
|
1124 |
"lemmatizer",
|
1125 |
"attribute_ruler"
|
1126 |
],
|
|
|
1129 |
"morphologizer",
|
1130 |
"tagger",
|
1131 |
"parser",
|
|
|
1132 |
"lemmatizer",
|
1133 |
"attribute_ruler"
|
1134 |
],
|
|
|
1136 |
|
1137 |
],
|
1138 |
"performance":{
|
1139 |
+
"pos_acc":0.985754209,
|
1140 |
+
"morph_acc":0.9466975666,
|
1141 |
"morph_per_feat":{
|
1142 |
"Case":{
|
1143 |
+
"p":0.9866161616,
|
1144 |
+
"r":0.9861181222,
|
1145 |
+
"f":0.986367079
|
1146 |
},
|
1147 |
"Gender":{
|
1148 |
+
"p":0.9454891995,
|
1149 |
+
"r":0.9494704606,
|
1150 |
+
"f":0.9474756478
|
1151 |
},
|
1152 |
"Number":{
|
1153 |
+
"p":0.9960678808,
|
1154 |
+
"r":0.9953469135,
|
1155 |
+
"f":0.9957072666
|
1156 |
},
|
1157 |
"Person":{
|
1158 |
+
"p":0.9922423347,
|
1159 |
+
"r":0.9915097822,
|
1160 |
+
"f":0.9918759232
|
1161 |
},
|
1162 |
"PronType":{
|
1163 |
+
"p":0.9921824104,
|
1164 |
+
"r":0.988961039,
|
1165 |
+
"f":0.9905691057
|
1166 |
},
|
1167 |
"Polarity":{
|
1168 |
"p":1.0,
|
|
|
1170 |
"f":0.9895287958
|
1171 |
},
|
1172 |
"Aspect":{
|
1173 |
+
"p":0.9857305936,
|
1174 |
+
"r":0.9801362089,
|
1175 |
+
"f":0.9829254411
|
1176 |
},
|
1177 |
"Mood":{
|
1178 |
+
"p":0.9913990826,
|
1179 |
+
"r":0.9896966228,
|
1180 |
+
"f":0.9905471212
|
1181 |
},
|
1182 |
"Tense":{
|
1183 |
+
"p":0.9845467033,
|
1184 |
+
"r":0.9848849193,
|
1185 |
+
"f":0.9847157822
|
1186 |
},
|
1187 |
"VerbForm":{
|
1188 |
+
"p":0.9969104016,
|
1189 |
+
"r":0.9969104016,
|
1190 |
+
"f":0.9969104016
|
1191 |
},
|
1192 |
"Voice":{
|
1193 |
+
"p":0.9776785714,
|
1194 |
+
"r":0.978014428,
|
1195 |
+
"f":0.9778464709
|
1196 |
},
|
1197 |
"Degree":{
|
1198 |
+
"p":0.9528936743,
|
1199 |
+
"r":0.9516129032,
|
1200 |
+
"f":0.9522528581
|
1201 |
},
|
1202 |
"Definite":{
|
1203 |
+
"p":0.9956896552,
|
1204 |
+
"r":0.9983792545,
|
1205 |
+
"f":0.9970326409
|
1206 |
},
|
1207 |
"Reflex":{
|
1208 |
"p":1.0,
|
|
|
1211 |
},
|
1212 |
"Poss":{
|
1213 |
"p":1.0,
|
1214 |
+
"r":0.9473684211,
|
1215 |
+
"f":0.972972973
|
1216 |
}
|
1217 |
},
|
1218 |
+
"tag_acc":0.9867084725,
|
1219 |
+
"dep_uas":0.8896213286,
|
1220 |
+
"dep_las":0.8552699041,
|
1221 |
"dep_las_per_type":{
|
1222 |
"nsubj":{
|
1223 |
+
"p":0.8678223185,
|
1224 |
+
"r":0.8678223185,
|
1225 |
+
"f":0.8678223185
|
1226 |
},
|
1227 |
"discourse":{
|
1228 |
+
"p":0.8641304348,
|
1229 |
+
"r":0.8736263736,
|
1230 |
+
"f":0.868852459
|
1231 |
},
|
1232 |
"mark":{
|
1233 |
+
"p":0.9047619048,
|
1234 |
+
"r":0.9193548387,
|
1235 |
+
"f":0.912
|
1236 |
},
|
1237 |
"advmod":{
|
1238 |
+
"p":0.8021638331,
|
1239 |
+
"r":0.7711738484,
|
1240 |
+
"f":0.7863636364
|
1241 |
},
|
1242 |
"advcl":{
|
1243 |
+
"p":0.8187830688,
|
1244 |
+
"r":0.8456284153,
|
1245 |
+
"f":0.8319892473
|
1246 |
},
|
1247 |
"xcomp":{
|
1248 |
+
"p":0.7142857143,
|
1249 |
+
"r":0.74,
|
1250 |
+
"f":0.7269155206
|
1251 |
},
|
1252 |
"cop":{
|
1253 |
+
"p":0.8248847926,
|
1254 |
+
"r":0.8364485981,
|
1255 |
+
"f":0.8306264501
|
1256 |
},
|
1257 |
"root":{
|
1258 |
+
"p":0.958863859,
|
1259 |
+
"r":0.9607458292,
|
1260 |
+
"f":0.9598039216
|
1261 |
},
|
1262 |
"det":{
|
1263 |
+
"p":0.9548022599,
|
1264 |
+
"r":0.9527320035,
|
1265 |
+
"f":0.9537660082
|
1266 |
},
|
1267 |
"nmod":{
|
1268 |
+
"p":0.7927927928,
|
1269 |
+
"r":0.7746478873,
|
1270 |
+
"f":0.7836153161
|
1271 |
},
|
1272 |
"obj":{
|
1273 |
+
"p":0.8841025641,
|
1274 |
+
"r":0.8951194185,
|
1275 |
+
"f":0.8895768834
|
1276 |
},
|
1277 |
"case":{
|
1278 |
+
"p":0.9704595186,
|
1279 |
+
"r":0.9725877193,
|
1280 |
+
"f":0.9715224535
|
1281 |
},
|
1282 |
"obl":{
|
1283 |
+
"p":0.800982801,
|
1284 |
+
"r":0.7960927961,
|
1285 |
+
"f":0.7985303123
|
1286 |
},
|
1287 |
"cc":{
|
1288 |
+
"p":0.8436213992,
|
1289 |
+
"r":0.8436213992,
|
1290 |
+
"f":0.8436213992
|
1291 |
},
|
1292 |
"conj":{
|
1293 |
+
"p":0.7836411609,
|
1294 |
+
"r":0.7795275591,
|
1295 |
+
"f":0.7815789474
|
1296 |
},
|
1297 |
"obl:agent":{
|
1298 |
+
"p":0.8461538462,
|
1299 |
+
"r":0.5945945946,
|
1300 |
+
"f":0.6984126984
|
1301 |
},
|
1302 |
"ccomp":{
|
1303 |
+
"p":0.7434554974,
|
1304 |
+
"r":0.7064676617,
|
1305 |
+
"f":0.7244897959
|
1306 |
},
|
1307 |
"nsubj:pass":{
|
1308 |
+
"p":0.7543859649,
|
1309 |
+
"r":0.8037383178,
|
1310 |
+
"f":0.778280543
|
1311 |
},
|
1312 |
"amod":{
|
1313 |
+
"p":0.8118811881,
|
1314 |
+
"r":0.7699530516,
|
1315 |
+
"f":0.7903614458
|
1316 |
},
|
1317 |
"acl":{
|
1318 |
+
"p":0.5448717949,
|
1319 |
"r":0.5151515152,
|
1320 |
+
"f":0.5295950156
|
1321 |
},
|
1322 |
"iobj":{
|
1323 |
+
"p":0.8048780488,
|
1324 |
+
"r":0.8383371824,
|
1325 |
+
"f":0.8212669683
|
1326 |
},
|
1327 |
"nummod":{
|
1328 |
+
"p":0.873015873,
|
1329 |
+
"r":0.8088235294,
|
1330 |
+
"f":0.8396946565
|
1331 |
},
|
1332 |
"vocative":{
|
1333 |
+
"p":0.935483871,
|
1334 |
+
"r":0.8405797101,
|
1335 |
+
"f":0.8854961832
|
1336 |
},
|
1337 |
"orphan":{
|
1338 |
+
"p":0.3913043478,
|
1339 |
+
"r":0.2093023256,
|
1340 |
+
"f":0.2727272727
|
1341 |
},
|
1342 |
"appos":{
|
1343 |
+
"p":0.5384615385,
|
1344 |
+
"r":0.4468085106,
|
1345 |
+
"f":0.488372093
|
1346 |
},
|
1347 |
+
"dislocated":{
|
1348 |
+
"p":0.5714285714,
|
1349 |
+
"r":0.3076923077,
|
1350 |
+
"f":0.4
|
1351 |
},
|
1352 |
"dep":{
|
1353 |
"p":0.0,
|
1354 |
"r":0.0,
|
1355 |
"f":0.0
|
1356 |
},
|
1357 |
+
"parataxis":{
|
1358 |
+
"p":0.6666666667,
|
1359 |
+
"r":0.2,
|
1360 |
+
"f":0.3076923077
|
1361 |
},
|
1362 |
"csubj:pass":{
|
1363 |
+
"p":0.0,
|
1364 |
+
"r":0.0,
|
1365 |
+
"f":0.0
|
1366 |
},
|
1367 |
"flat:name":{
|
1368 |
+
"p":0.8666666667,
|
1369 |
"r":0.5909090909,
|
1370 |
+
"f":0.7027027027
|
1371 |
},
|
1372 |
"aux:pass":{
|
1373 |
"p":0.0,
|
|
|
1376 |
},
|
1377 |
"fixed":{
|
1378 |
"p":1.0,
|
1379 |
+
"r":0.7,
|
1380 |
+
"f":0.8235294118
|
1381 |
},
|
1382 |
"aux":{
|
1383 |
"p":0.0,
|
|
|
1385 |
"f":0.0
|
1386 |
}
|
1387 |
},
|
1388 |
+
"sents_p":0.9970617042,
|
1389 |
+
"sents_r":0.9990186457,
|
1390 |
+
"sents_f":0.9980392157,
|
1391 |
+
"lemma_acc":0.9653057051,
|
1392 |
+
"transformer_loss":443.1381021026,
|
1393 |
+
"morphologizer_loss":3.3029946992,
|
1394 |
+
"tagger_loss":1.3335578433,
|
1395 |
+
"parser_loss":19904.7449291433
|
|
|
1396 |
},
|
1397 |
"requirements":[
|
1398 |
"spacy-transformers>=1.3.4,<1.4.0"
|
morphologizer/cfg
CHANGED
@@ -27,6 +27,7 @@
|
|
27 |
"Case=Acc|Gender=Fem|Number=Sing|POS=NOUN":"Case=Acc|Gender=Fem|Number=Sing",
|
28 |
"Aspect=Perf|Mood=Ind|Number=Plur|POS=VERB|Person=3|Tense=Past|VerbForm=Fin|Voice=Act":"Aspect=Perf|Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin|Voice=Act",
|
29 |
"Case=Dat|Gender=Masc|Number=Plur|POS=PRON|PronType=Rcp":"Case=Dat|Gender=Masc|Number=Plur|PronType=Rcp",
|
|
|
30 |
"Case=Nom|Definite=Def|Gender=Masc|Number=Plur|POS=DET|PronType=Dem":"Case=Nom|Definite=Def|Gender=Masc|Number=Plur|PronType=Dem",
|
31 |
"Case=Nom|Gender=Masc|Number=Plur|POS=NOUN":"Case=Nom|Gender=Masc|Number=Plur",
|
32 |
"Case=Acc|Gender=Masc|Number=Plur|POS=NOUN":"Case=Acc|Gender=Masc|Number=Plur",
|
@@ -709,6 +710,7 @@
|
|
709 |
"Aspect=Perf|Case=Nom|Gender=Neut|Number=Plur|POS=VERB|Tense=Past|VerbForm=Part|Voice=Act":"Aspect=Perf|Case=Nom|Gender=Neut|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act",
|
710 |
"Aspect=Perf|Mood=Sub|Number=Plur|POS=VERB|Person=2|Tense=Past|VerbForm=Fin|Voice=Mid":"Aspect=Perf|Mood=Sub|Number=Plur|Person=2|Tense=Past|VerbForm=Fin|Voice=Mid",
|
711 |
"Case=Dat|Degree=Sup|Gender=Fem|Number=Sing|POS=ADJ":"Case=Dat|Degree=Sup|Gender=Fem|Number=Sing",
|
|
|
712 |
"Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|POS=VERB|Tense=Past|VerbForm=Part|Voice=Pass":"Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass",
|
713 |
"Case=Acc|Gender=Fem,Masc|Number=Sing|POS=NOUN":"Case=Acc|Gender=Fem,Masc|Number=Sing",
|
714 |
"Case=Dat|Gender=Fem,Masc|Number=Sing|POS=NOUN":"Case=Dat|Gender=Fem,Masc|Number=Sing",
|
@@ -1058,6 +1060,7 @@
|
|
1058 |
"Case=Acc|Gender=Fem|Number=Sing|POS=NOUN":92,
|
1059 |
"Aspect=Perf|Mood=Ind|Number=Plur|POS=VERB|Person=3|Tense=Past|VerbForm=Fin|Voice=Act":100,
|
1060 |
"Case=Dat|Gender=Masc|Number=Plur|POS=PRON|PronType=Rcp":95,
|
|
|
1061 |
"Case=Nom|Definite=Def|Gender=Masc|Number=Plur|POS=DET|PronType=Dem":90,
|
1062 |
"Case=Nom|Gender=Masc|Number=Plur|POS=NOUN":92,
|
1063 |
"Case=Acc|Gender=Masc|Number=Plur|POS=NOUN":92,
|
@@ -1740,6 +1743,7 @@
|
|
1740 |
"Aspect=Perf|Case=Nom|Gender=Neut|Number=Plur|POS=VERB|Tense=Past|VerbForm=Part|Voice=Act":100,
|
1741 |
"Aspect=Perf|Mood=Sub|Number=Plur|POS=VERB|Person=2|Tense=Past|VerbForm=Fin|Voice=Mid":100,
|
1742 |
"Case=Dat|Degree=Sup|Gender=Fem|Number=Sing|POS=ADJ":84,
|
|
|
1743 |
"Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|POS=VERB|Tense=Past|VerbForm=Part|Voice=Pass":100,
|
1744 |
"Case=Acc|Gender=Fem,Masc|Number=Sing|POS=NOUN":92,
|
1745 |
"Case=Dat|Gender=Fem,Masc|Number=Sing|POS=NOUN":92,
|
|
|
27 |
"Case=Acc|Gender=Fem|Number=Sing|POS=NOUN":"Case=Acc|Gender=Fem|Number=Sing",
|
28 |
"Aspect=Perf|Mood=Ind|Number=Plur|POS=VERB|Person=3|Tense=Past|VerbForm=Fin|Voice=Act":"Aspect=Perf|Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin|Voice=Act",
|
29 |
"Case=Dat|Gender=Masc|Number=Plur|POS=PRON|PronType=Rcp":"Case=Dat|Gender=Masc|Number=Plur|PronType=Rcp",
|
30 |
+
"POS=PUNCT":"",
|
31 |
"Case=Nom|Definite=Def|Gender=Masc|Number=Plur|POS=DET|PronType=Dem":"Case=Nom|Definite=Def|Gender=Masc|Number=Plur|PronType=Dem",
|
32 |
"Case=Nom|Gender=Masc|Number=Plur|POS=NOUN":"Case=Nom|Gender=Masc|Number=Plur",
|
33 |
"Case=Acc|Gender=Masc|Number=Plur|POS=NOUN":"Case=Acc|Gender=Masc|Number=Plur",
|
|
|
710 |
"Aspect=Perf|Case=Nom|Gender=Neut|Number=Plur|POS=VERB|Tense=Past|VerbForm=Part|Voice=Act":"Aspect=Perf|Case=Nom|Gender=Neut|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act",
|
711 |
"Aspect=Perf|Mood=Sub|Number=Plur|POS=VERB|Person=2|Tense=Past|VerbForm=Fin|Voice=Mid":"Aspect=Perf|Mood=Sub|Number=Plur|Person=2|Tense=Past|VerbForm=Fin|Voice=Mid",
|
712 |
"Case=Dat|Degree=Sup|Gender=Fem|Number=Sing|POS=ADJ":"Case=Dat|Degree=Sup|Gender=Fem|Number=Sing",
|
713 |
+
"POS=AUX":"",
|
714 |
"Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|POS=VERB|Tense=Past|VerbForm=Part|Voice=Pass":"Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass",
|
715 |
"Case=Acc|Gender=Fem,Masc|Number=Sing|POS=NOUN":"Case=Acc|Gender=Fem,Masc|Number=Sing",
|
716 |
"Case=Dat|Gender=Fem,Masc|Number=Sing|POS=NOUN":"Case=Dat|Gender=Fem,Masc|Number=Sing",
|
|
|
1060 |
"Case=Acc|Gender=Fem|Number=Sing|POS=NOUN":92,
|
1061 |
"Aspect=Perf|Mood=Ind|Number=Plur|POS=VERB|Person=3|Tense=Past|VerbForm=Fin|Voice=Act":100,
|
1062 |
"Case=Dat|Gender=Masc|Number=Plur|POS=PRON|PronType=Rcp":95,
|
1063 |
+
"POS=PUNCT":97,
|
1064 |
"Case=Nom|Definite=Def|Gender=Masc|Number=Plur|POS=DET|PronType=Dem":90,
|
1065 |
"Case=Nom|Gender=Masc|Number=Plur|POS=NOUN":92,
|
1066 |
"Case=Acc|Gender=Masc|Number=Plur|POS=NOUN":92,
|
|
|
1743 |
"Aspect=Perf|Case=Nom|Gender=Neut|Number=Plur|POS=VERB|Tense=Past|VerbForm=Part|Voice=Act":100,
|
1744 |
"Aspect=Perf|Mood=Sub|Number=Plur|POS=VERB|Person=2|Tense=Past|VerbForm=Fin|Voice=Mid":100,
|
1745 |
"Case=Dat|Degree=Sup|Gender=Fem|Number=Sing|POS=ADJ":84,
|
1746 |
+
"POS=AUX":87,
|
1747 |
"Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|POS=VERB|Tense=Past|VerbForm=Part|Voice=Pass":100,
|
1748 |
"Case=Acc|Gender=Fem,Masc|Number=Sing|POS=NOUN":92,
|
1749 |
"Case=Dat|Gender=Fem,Masc|Number=Sing|POS=NOUN":92,
|
morphologizer/model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2cf8607c13ee0221b098e501600c2584bfd49f3a0400b02ca7018ffc9dc0a320
|
3 |
+
size 3172009
|
parser/model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce04316becfea07f15b8b4ac961ae12cc9d1c628e917ea5847ccddfb34677c6d
|
3 |
+
size 1919319
|
parser/moves
CHANGED
@@ -1 +1 @@
|
|
1 |
-
��moves
|
|
|
1 |
+
��moves�?{"0":{"":99376},"1":{"":87659},"2":{"det":25097,"case":13178,"advmod":8281,"nsubj":8155,"discourse":7820,"advcl":5178,"obj":4646,"obl":4348,"mark":3054,"cc":2782,"iobj":1765,"cop":1464,"nmod":1391,"amod":1350,"nsubj:pass":966,"det||nsubj":961,"xcomp":917,"vocative":752,"nummod":549,"mark||advcl":409,"case||obl":395,"obj||advcl":366,"det||obj":353,"dislocated":261,"acl":254,"obl||advcl":245,"orphan":206,"nmod||nsubj":192,"nsubj||advcl":175,"ccomp":161,"nsubj||ccomp":158,"det||nsubj:pass":140,"advmod||advcl":136,"obj||xcomp":132,"obl:agent":126,"cc||advcl":121,"conj||advcl":118,"det||obl":115,"nmod||obj":108,"parataxis":106,"det||iobj":94,"amod||obj":88,"det||nmod":79,"xcomp||advcl":77,"amod||nsubj":75,"obj||ccomp":71,"iobj||advcl":70,"obl||xcomp":64,"iobj||xcomp":64,"advmod||xcomp":55,"advmod||ccomp":49,"appos||nsubj":47,"obl||ccomp":45,"ccomp||advcl":44,"det||advmod":42,"cc||nsubj":42,"nmod||obl":41,"advmod||advmod":39,"nsubj:pass||advcl":34,"iobj||ccomp":34,"amod||obl":30,"dep":0},"3":{"punct":15014,"conj":8819,"cc":8497,"obl":7446,"obj":6636,"nmod":5543,"nsubj":3918,"advcl":3876,"det":3859,"iobj":3825,"xcomp":2226,"ccomp":2144,"discourse":2105,"advmod":1915,"appos":1627,"acl":1443,"amod":1361,"cop":1355,"nsubj:pass":710,"orphan":452,"obl:agent":273,"flat:name":248,"vocative":243,"nummod":240,"acl||obj":174,"acl||nsubj":152,"fixed":148,"appos||nsubj":120,"csubj:pass":118,"nmod||obj":116,"conj||nsubj":113,"parataxis":110,"cc||nsubj":110,"nmod||nsubj":102,"conj||obj":101,"cc||obj":92,"appos||obj":84,"amod||obj":79,"case":69,"conj||obl":65,"cc||obl":63,"amod||nsubj":63,"dislocated":59,"det||obj":54,"acl||obl":53,"appos||obl":48,"conj||xcomp":46,"det||nsubj":45,"cop||xcomp":40,"iobj||xcomp":38,"conj||nmod":37,"obl||xcomp":35,"conj||iobj":35,"cc||nmod":35,"cop||ccomp":34,"cc||iobj":33,"cc||xcomp":32,"dep":0},"4":{"ROOT":15014}}�cfg��neg_key�
|
tagger/cfg
CHANGED
@@ -23,7 +23,8 @@
|
|
23 |
"Px",
|
24 |
"R-",
|
25 |
"S-",
|
26 |
-
"V-"
|
|
|
27 |
],
|
28 |
"neg_prefix":"!",
|
29 |
"overwrite":false
|
|
|
23 |
"Px",
|
24 |
"R-",
|
25 |
"S-",
|
26 |
+
"V-",
|
27 |
+
"Z"
|
28 |
],
|
29 |
"neg_prefix":"!",
|
30 |
"overwrite":false
|
tagger/model
CHANGED
Binary files a/tagger/model and b/tagger/model differ
|
|
transformer/model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:766b290e344b548ea87fae03eb56d5ce4303ceea9fd4081f6921adf08b6ec2e1
|
3 |
+
size 500074867
|
vocab/strings.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3726a679fe296082a7268b903f16b1a57d2a5451384596394aa60e1309164585
|
3 |
+
size 23343529
|
vocab/vectors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:14772b683e726436d5948ad3fff2b43d036ef2ebbe3458aafed6004e05a40706
|
3 |
+
size 128
|
vocab/vectors.cfg
CHANGED
@@ -1,10 +1,3 @@
|
|
1 |
{
|
2 |
-
"mode":"
|
3 |
-
"minn":2,
|
4 |
-
"maxn":10,
|
5 |
-
"hash_count":2,
|
6 |
-
"hash_seed":2166136261,
|
7 |
-
"bow":"<",
|
8 |
-
"eow":">",
|
9 |
-
"attr":65
|
10 |
}
|
|
|
1 |
{
|
2 |
+
"mode":"default"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
}
|