osanseviero commited on
Commit
5c423e0
β€’
1 Parent(s): 3bf6c38

Update spaCy pipeline

Browse files
README.md CHANGED
@@ -14,47 +14,47 @@ model-index:
14
  metrics:
15
  - name: NER Precision
16
  type: precision
17
- value: 0.6744976507
18
  - name: NER Recall
19
  type: recall
20
- value: 0.7414285714
21
  - name: NER F Score
22
  type: f_score
23
- value: 0.7063811967
24
  - task:
25
  name: POS
26
  type: token-classification
27
  metrics:
28
  - name: POS Accuracy
29
  type: accuracy
30
- value: 0.92444533
31
  - task:
32
  name: SENTER
33
  type: token-classification
34
  metrics:
35
  - name: SENTER Precision
36
  type: precision
37
- value: 0.6917940608
38
  - name: SENTER Recall
39
  type: recall
40
- value: 0.655402031
41
  - name: SENTER F Score
42
  type: f_score
43
- value: 0.6731065139
44
  - task:
45
  name: UNLABELED_DEPENDENCIES
46
  type: token-classification
47
  metrics:
48
  - name: Unlabeled Dependencies Accuracy
49
  type: accuracy
50
- value: 0.7661671924
51
  - task:
52
  name: LABELED_DEPENDENCIES
53
  type: token-classification
54
  metrics:
55
  - name: Labeled Dependencies Accuracy
56
  type: accuracy
57
- value: 0.7661671924
58
  ---
59
  ### Details: https://spacy.io/models/zh#zh_core_web_trf
60
 
@@ -63,8 +63,8 @@ Chinese transformer pipeline (bert-base-chinese). Components: transformer, tagge
63
  | Feature | Description |
64
  | --- | --- |
65
  | **Name** | `zh_core_web_trf` |
66
- | **Version** | `3.1.0` |
67
- | **spaCy** | `>=3.1.0,<3.2.0` |
68
  | **Default Pipeline** | `transformer`, `tagger`, `parser`, `attribute_ruler`, `ner` |
69
  | **Components** | `transformer`, `tagger`, `parser`, `attribute_ruler`, `ner` |
70
  | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
@@ -91,12 +91,15 @@ Chinese transformer pipeline (bert-base-chinese). Components: transformer, tagge
91
  | Type | Score |
92
  | --- | --- |
93
  | `TOKEN_ACC` | 97.88 |
94
- | `TAG_ACC` | 92.44 |
95
- | `DEP_UAS` | 76.62 |
96
- | `DEP_LAS` | 72.80 |
97
- | `ENTS_P` | 67.45 |
98
- | `ENTS_R` | 74.14 |
99
- | `ENTS_F` | 70.64 |
100
- | `SENTS_P` | 69.18 |
101
- | `SENTS_R` | 65.54 |
102
- | `SENTS_F` | 67.31 |
 
 
 
 
14
  metrics:
15
  - name: NER Precision
16
  type: precision
17
+ value: 0.746365105
18
  - name: NER Recall
19
  type: recall
20
+ value: 0.7615384615
21
  - name: NER F Score
22
  type: f_score
23
+ value: 0.7538754419
24
  - task:
25
  name: POS
26
  type: token-classification
27
  metrics:
28
  - name: POS Accuracy
29
  type: accuracy
30
+ value: 0.9247167985
31
  - task:
32
  name: SENTER
33
  type: token-classification
34
  metrics:
35
  - name: SENTER Precision
36
  type: precision
37
+ value: 0.7110739502
38
  - name: SENTER Recall
39
  type: recall
40
+ value: 0.6370900616
41
  - name: SENTER F Score
42
  type: f_score
43
+ value: 0.67205198
44
  - task:
45
  name: UNLABELED_DEPENDENCIES
46
  type: token-classification
47
  metrics:
48
  - name: Unlabeled Dependencies Accuracy
49
  type: accuracy
50
+ value: 0.7683558244
51
  - task:
52
  name: LABELED_DEPENDENCIES
53
  type: token-classification
54
  metrics:
55
  - name: Labeled Dependencies Accuracy
56
  type: accuracy
57
+ value: 0.7683558244
58
  ---
59
  ### Details: https://spacy.io/models/zh#zh_core_web_trf
60
 
 
63
  | Feature | Description |
64
  | --- | --- |
65
  | **Name** | `zh_core_web_trf` |
66
+ | **Version** | `3.2.0` |
67
+ | **spaCy** | `>=3.2.0,<3.3.0` |
68
  | **Default Pipeline** | `transformer`, `tagger`, `parser`, `attribute_ruler`, `ner` |
69
  | **Components** | `transformer`, `tagger`, `parser`, `attribute_ruler`, `ner` |
70
  | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
 
91
  | Type | Score |
92
  | --- | --- |
93
  | `TOKEN_ACC` | 97.88 |
94
+ | `TOKEN_P` | 94.58 |
95
+ | `TOKEN_R` | 91.36 |
96
+ | `TOKEN_F` | 92.94 |
97
+ | `TAG_ACC` | 92.47 |
98
+ | `SENTS_P` | 71.11 |
99
+ | `SENTS_R` | 63.71 |
100
+ | `SENTS_F` | 67.21 |
101
+ | `DEP_UAS` | 76.84 |
102
+ | `DEP_LAS` | 73.07 |
103
+ | `ENTS_P` | 74.64 |
104
+ | `ENTS_R` | 76.15 |
105
+ | `ENTS_F` | 75.39 |
accuracy.json CHANGED
@@ -1,40 +1,39 @@
1
  {
2
  "token_acc": 0.9788303388,
3
- "tag_acc": 0.92444533,
4
- "dep_uas": 0.7661671924,
5
- "dep_las": 0.727981345,
6
- "ents_p": 0.6744976507,
7
- "ents_r": 0.7414285714,
8
- "ents_f": 0.7063811967,
9
- "sents_p": 0.6917940608,
10
- "sents_r": 0.655402031,
11
- "sents_f": 0.6731065139,
12
- "speed": 4304.7686585922,
13
  "dep_las_per_type": {
14
  "dep": {
15
- "p": 0.5611908839,
16
- "r": 0.4304334647,
17
- "f": 0.4871912168
18
  },
19
  "case": {
20
- "p": 0.9069435432,
21
- "r": 0.8472356935,
22
- "f": 0.8760734658
23
  },
24
  "nmod:tmod": {
25
- "p": 0.8046448087,
26
- "r": 0.8013605442,
27
- "f": 0.8029993183
28
  },
29
  "nummod": {
30
- "p": 0.9012345679,
31
- "r": 0.583610926,
32
- "f": 0.7084512738
33
  },
34
  "mark:clf": {
35
- "p": 0.9517326733,
36
  "r": 0.5736665423,
37
- "f": 0.7158482662
38
  },
39
  "auxpass": {
40
  "p": 0.9293478261,
@@ -42,189 +41,189 @@
42
  "f": 0.9268292683
43
  },
44
  "nsubj": {
45
- "p": 0.8733342307,
46
- "r": 0.7969536912,
47
- "f": 0.8333975594
48
  },
49
  "acl": {
50
- "p": 0.8216805645,
51
- "r": 0.7104825291,
52
- "f": 0.762046401
53
  },
54
  "advmod": {
55
- "p": 0.8829383266,
56
- "r": 0.7643391521,
57
- "f": 0.819369342
58
  },
59
  "mark": {
60
- "p": 0.865225391,
61
- "r": 0.82427695,
62
- "f": 0.8442549372
63
  },
64
  "xcomp": {
65
- "p": 0.8221343874,
66
- "r": 0.67752443,
67
- "f": 0.7428571429
68
  },
69
  "nmod:assmod": {
70
- "p": 0.8709787817,
71
- "r": 0.7920946156,
72
- "f": 0.8296658517
73
  },
74
  "det": {
75
- "p": 0.9033306255,
76
- "r": 0.6514352665,
77
- "f": 0.7569775357
78
  },
79
  "amod": {
80
- "p": 0.8509749304,
81
- "r": 0.7199528672,
82
- "f": 0.78
83
  },
84
  "nmod:prep": {
85
- "p": 0.8192449048,
86
- "r": 0.7416817907,
87
- "f": 0.7785362756
88
  },
89
  "root": {
90
- "p": 0.7726093403,
91
- "r": 0.6940236391,
92
- "f": 0.7312110848
93
  },
94
  "aux:prtmod": {
95
- "p": 0.9042145594,
96
- "r": 0.8428571429,
97
- "f": 0.8724584104
98
  },
99
  "compound:nn": {
100
- "p": 0.8040945994,
101
- "r": 0.7708967851,
102
- "f": 0.7871458189
103
  },
104
  "dobj": {
105
- "p": 0.9081300813,
106
- "r": 0.8272848467,
107
- "f": 0.8658243547
108
  },
109
  "ccomp": {
110
- "p": 0.7877000842,
111
- "r": 0.7270606532,
112
- "f": 0.7561665993
113
  },
114
  "advmod:rcomp": {
115
- "p": 0.8475609756,
116
- "r": 0.7700831025,
117
- "f": 0.8069666183
118
  },
119
  "nmod:topic": {
120
- "p": 0.5434782609,
121
- "r": 0.487012987,
122
- "f": 0.5136986301
123
  },
124
  "cop": {
125
- "p": 0.8351555929,
126
- "r": 0.638996139,
127
- "f": 0.7240247904
128
  },
129
  "discourse": {
130
- "p": 0.6153846154,
131
- "r": 0.5478547855,
132
- "f": 0.5796595373
133
  },
134
  "neg": {
135
- "p": 0.8932496075,
136
- "r": 0.6765755054,
137
- "f": 0.7699594046
138
  },
139
  "aux:modal": {
140
- "p": 0.9019823789,
141
- "r": 0.8469493278,
142
- "f": 0.8736
143
  },
144
  "nmod": {
145
- "p": 0.7988505747,
146
- "r": 0.7544097693,
147
- "f": 0.7759944173
148
  },
149
  "aux:ba": {
150
- "p": 0.9265536723,
151
- "r": 0.8723404255,
152
- "f": 0.898630137
153
  },
154
  "advmod:loc": {
155
- "p": 0.80859375,
156
- "r": 0.6142433234,
157
- "f": 0.6981450253
158
  },
159
  "aux:asp": {
160
- "p": 0.9320882852,
161
- "r": 0.8755980861,
162
- "f": 0.9029605263
163
  },
164
  "conj": {
165
- "p": 0.6313854489,
166
- "r": 0.6168241966,
167
- "f": 0.6240198891
168
  },
169
  "nsubjpass": {
170
- "p": 0.8913043478,
171
- "r": 0.82,
172
- "f": 0.8541666667
173
  },
174
  "compound:vc": {
175
- "p": 0.5459183673,
176
- "r": 0.5544041451,
177
- "f": 0.5501285347
178
  },
179
  "advcl:loc": {
180
- "p": 0.7586206897,
181
- "r": 0.6285714286,
182
- "f": 0.6875
183
  },
184
  "cc": {
185
- "p": 0.7972477064,
186
- "r": 0.7710736469,
187
- "f": 0.7839422643
188
  },
189
  "advmod:dvp": {
190
- "p": 0.9076923077,
191
- "r": 0.7329192547,
192
- "f": 0.8109965636
193
  },
194
  "amod:ordmod": {
195
- "p": 0.6666666667,
196
- "r": 0.59375,
197
- "f": 0.6280991736
198
  },
199
  "appos": {
200
- "p": 0.9434889435,
201
- "r": 0.8827586207,
202
- "f": 0.9121140143
203
  },
204
  "nmod:poss": {
205
- "p": 0.7647058824,
206
- "r": 0.6740740741,
207
- "f": 0.7165354331
208
  },
209
  "name": {
210
- "p": 0.6097560976,
211
- "r": 0.5555555556,
212
- "f": 0.5813953488
213
  },
214
  "nsubj:xsubj": {
215
- "p": 0.5,
216
  "r": 0.1,
217
- "f": 0.1666666667
218
  },
219
  "nmod:range": {
220
- "p": 0.8295454545,
221
- "r": 0.7348993289,
222
- "f": 0.7793594306
223
  },
224
  "parataxis:prnmod": {
225
- "p": 0.3663366337,
226
- "r": 0.2781954887,
227
- "f": 0.3162393162
228
  },
229
  "erased": {
230
  "p": 0.0,
@@ -232,101 +231,105 @@
232
  "f": 0.0
233
  },
234
  "etc": {
235
- "p": 0.9285714286,
236
  "r": 0.9285714286,
237
- "f": 0.9285714286
238
  }
239
  },
 
 
 
240
  "ents_per_type": {
241
  "DATE": {
242
- "p": 0.6931530008,
243
- "r": 0.8126858276,
244
- "f": 0.7481751825
245
  },
246
  "GPE": {
247
- "p": 0.792633015,
248
- "r": 0.8519061584,
249
- "f": 0.8212014134
250
- },
251
- "CARDINAL": {
252
- "p": 0.5527502254,
253
- "r": 0.6179435484,
254
- "f": 0.5835316516
255
  },
256
  "ORDINAL": {
257
- "p": 0.8287292818,
258
  "r": 0.7894736842,
259
- "f": 0.8086253369
260
  },
261
  "FAC": {
262
- "p": 0.5301204819,
263
- "r": 0.4731182796,
264
- "f": 0.5
265
  },
266
  "ORG": {
267
- "p": 0.7304479879,
268
- "r": 0.7321156773,
269
- "f": 0.7312808818
270
- },
271
- "LOC": {
272
- "p": 0.1899383984,
273
- "r": 0.497311828,
274
- "f": 0.2748885587
275
  },
276
  "NORP": {
277
- "p": 0.6797900262,
278
- "r": 0.5441176471,
279
- "f": 0.6044340723
 
 
 
 
 
280
  },
281
  "QUANTITY": {
282
- "p": 0.7363636364,
283
- "r": 0.6,
284
- "f": 0.6612244898
285
  },
286
  "PERSON": {
287
- "p": 0.8649842271,
288
- "r": 0.8833762887,
289
- "f": 0.8740835193
 
 
 
 
 
290
  },
291
  "TIME": {
292
- "p": 0.711627907,
293
- "r": 0.7427184466,
294
- "f": 0.7268408551
295
  },
296
  "WORK_OF_ART": {
297
- "p": 0.1849315068,
298
- "r": 0.18,
299
- "f": 0.1824324324
300
  },
301
  "MONEY": {
302
- "p": 0.8682170543,
303
  "r": 0.8296296296,
304
- "f": 0.8484848485
305
  },
306
  "EVENT": {
307
- "p": 0.5804195804,
308
- "r": 0.6102941176,
309
- "f": 0.5949820789
310
  },
311
  "PERCENT": {
312
- "p": 0.7640449438,
313
- "r": 0.8192771084,
314
- "f": 0.7906976744
315
  },
316
  "PRODUCT": {
317
- "p": 0.5384615385,
318
- "r": 0.1428571429,
319
- "f": 0.2258064516
320
  },
321
  "LAW": {
322
- "p": 0.3076923077,
323
- "r": 0.2666666667,
324
- "f": 0.2857142857
325
  },
326
  "LANGUAGE": {
327
- "p": 0.8181818182,
328
  "r": 1.0,
329
- "f": 0.9
330
  }
331
- }
 
332
  }
 
1
  {
2
  "token_acc": 0.9788303388,
3
+ "token_p": 0.9458325855,
4
+ "token_r": 0.9136060443,
5
+ "token_f": 0.9294400505,
6
+ "tag_acc": 0.9247167985,
7
+ "sents_p": 0.7110739502,
8
+ "sents_r": 0.6370900616,
9
+ "sents_f": 0.67205198,
10
+ "dep_uas": 0.7683558244,
11
+ "dep_las": 0.7307018312,
 
12
  "dep_las_per_type": {
13
  "dep": {
14
+ "p": 0.5656512605,
15
+ "r": 0.4352834192,
16
+ "f": 0.4919773882
17
  },
18
  "case": {
19
+ "p": 0.9131065401,
20
+ "r": 0.8395974782,
21
+ "f": 0.8748105104
22
  },
23
  "nmod:tmod": {
24
+ "p": 0.8136054422,
25
+ "r": 0.8136054422,
26
+ "f": 0.8136054422
27
  },
28
  "nummod": {
29
+ "p": 0.9011213048,
30
+ "r": 0.5889407062,
31
+ "f": 0.7123287671
32
  },
33
  "mark:clf": {
34
+ "p": 0.9499691167,
35
  "r": 0.5736665423,
36
+ "f": 0.7153488372
37
  },
38
  "auxpass": {
39
  "p": 0.9293478261,
 
41
  "f": 0.9268292683
42
  },
43
  "nsubj": {
44
+ "p": 0.8800107614,
45
+ "r": 0.803586783,
46
+ "f": 0.8400642055
47
  },
48
  "acl": {
49
+ "p": 0.8209331652,
50
+ "r": 0.7221297837,
51
+ "f": 0.7683682502
52
  },
53
  "advmod": {
54
+ "p": 0.8834787137,
55
+ "r": 0.7692133303,
56
+ "f": 0.822395928
57
  },
58
  "mark": {
59
+ "p": 0.8492417484,
60
+ "r": 0.8343558282,
61
+ "f": 0.8417329797
62
  },
63
  "xcomp": {
64
+ "p": 0.8368522073,
65
+ "r": 0.7100977199,
66
+ "f": 0.7682819383
67
  },
68
  "nmod:assmod": {
69
+ "p": 0.8744897959,
70
+ "r": 0.8001867414,
71
+ "f": 0.8356899074
72
  },
73
  "det": {
74
+ "p": 0.8905872888,
75
+ "r": 0.6485061511,
76
+ "f": 0.7505084746
77
  },
78
  "amod": {
79
+ "p": 0.8427987279,
80
+ "r": 0.7285938727,
81
+ "f": 0.7815462397
82
  },
83
  "nmod:prep": {
84
+ "p": 0.8286297178,
85
+ "r": 0.7371445856,
86
+ "f": 0.780214503
87
  },
88
  "root": {
89
+ "p": 0.792271479,
90
+ "r": 0.6723822207,
91
+ "f": 0.727420081
92
  },
93
  "aux:prtmod": {
94
+ "p": 0.9251968504,
95
+ "r": 0.8392857143,
96
+ "f": 0.8801498127
97
  },
98
  "compound:nn": {
99
+ "p": 0.8133166279,
100
+ "r": 0.7688663283,
101
+ "f": 0.7904670784
102
  },
103
  "dobj": {
104
+ "p": 0.9164218087,
105
+ "r": 0.8315805066,
106
+ "f": 0.8719422226
107
  },
108
  "ccomp": {
109
+ "p": 0.7944862155,
110
+ "r": 0.7395023328,
111
+ "f": 0.7660088602
112
  },
113
  "advmod:rcomp": {
114
+ "p": 0.8541033435,
115
+ "r": 0.7783933518,
116
+ "f": 0.8144927536
117
  },
118
  "nmod:topic": {
119
+ "p": 0.5104166667,
120
+ "r": 0.4772727273,
121
+ "f": 0.4932885906
122
  },
123
  "cop": {
124
+ "p": 0.8533218292,
125
+ "r": 0.6364221364,
126
+ "f": 0.7290821968
127
  },
128
  "discourse": {
129
+ "p": 0.6257833483,
130
+ "r": 0.5767326733,
131
+ "f": 0.6002576213
132
  },
133
  "neg": {
134
+ "p": 0.898089172,
135
+ "r": 0.6706302021,
136
+ "f": 0.7678692988
137
  },
138
  "aux:modal": {
139
+ "p": 0.9086908691,
140
+ "r": 0.854188211,
141
+ "f": 0.8805970149
142
  },
143
  "nmod": {
144
+ "p": 0.8080808081,
145
+ "r": 0.7598371777,
146
+ "f": 0.7832167832
147
  },
148
  "aux:ba": {
149
+ "p": 0.9485714286,
150
+ "r": 0.8829787234,
151
+ "f": 0.914600551
152
  },
153
  "advmod:loc": {
154
+ "p": 0.803030303,
155
+ "r": 0.6290801187,
156
+ "f": 0.7054908486
157
  },
158
  "aux:asp": {
159
+ "p": 0.9346349745,
160
+ "r": 0.8779904306,
161
+ "f": 0.9054276316
162
  },
163
  "conj": {
164
+ "p": 0.6249059443,
165
+ "r": 0.6279773157,
166
+ "f": 0.6264378654
167
  },
168
  "nsubjpass": {
169
+ "p": 0.9302325581,
170
+ "r": 0.8,
171
+ "f": 0.8602150538
172
  },
173
  "compound:vc": {
174
+ "p": 0.5380952381,
175
+ "r": 0.585492228,
176
+ "f": 0.5607940447
177
  },
178
  "advcl:loc": {
179
+ "p": 0.7894736842,
180
+ "r": 0.6428571429,
181
+ "f": 0.7086614173
182
  },
183
  "cc": {
184
+ "p": 0.8191287879,
185
+ "r": 0.7675244011,
186
+ "f": 0.7924874027
187
  },
188
  "advmod:dvp": {
189
+ "p": 0.8731343284,
190
+ "r": 0.7267080745,
191
+ "f": 0.793220339
192
  },
193
  "amod:ordmod": {
194
+ "p": 0.640625,
195
+ "r": 0.640625,
196
+ "f": 0.640625
197
  },
198
  "appos": {
199
+ "p": 0.9443099274,
200
+ "r": 0.8965517241,
201
+ "f": 0.9198113208
202
  },
203
  "nmod:poss": {
204
+ "p": 0.796460177,
205
+ "r": 0.6666666667,
206
+ "f": 0.7258064516
207
  },
208
  "name": {
209
+ "p": 0.6635514019,
210
+ "r": 0.5259259259,
211
+ "f": 0.5867768595
212
  },
213
  "nsubj:xsubj": {
214
+ "p": 0.25,
215
  "r": 0.1,
216
+ "f": 0.1428571429
217
  },
218
  "nmod:range": {
219
+ "p": 0.8365019011,
220
+ "r": 0.7382550336,
221
+ "f": 0.7843137255
222
  },
223
  "parataxis:prnmod": {
224
+ "p": 0.3645833333,
225
+ "r": 0.2631578947,
226
+ "f": 0.3056768559
227
  },
228
  "erased": {
229
  "p": 0.0,
 
231
  "f": 0.0
232
  },
233
  "etc": {
234
+ "p": 0.8965517241,
235
  "r": 0.9285714286,
236
+ "f": 0.9122807018
237
  }
238
  },
239
+ "ents_p": 0.746365105,
240
+ "ents_r": 0.7615384615,
241
+ "ents_f": 0.7538754419,
242
  "ents_per_type": {
243
  "DATE": {
244
+ "p": 0.7673357664,
245
+ "r": 0.8334985134,
246
+ "f": 0.7990498812
247
  },
248
  "GPE": {
249
+ "p": 0.7898259705,
250
+ "r": 0.8651026393,
251
+ "f": 0.8257522743
 
 
 
 
 
252
  },
253
  "ORDINAL": {
254
+ "p": 0.8720930233,
255
  "r": 0.7894736842,
256
+ "f": 0.8287292818
257
  },
258
  "FAC": {
259
+ "p": 0.4685990338,
260
+ "r": 0.5215053763,
261
+ "f": 0.4936386768
262
  },
263
  "ORG": {
264
+ "p": 0.749034749,
265
+ "r": 0.7382039574,
266
+ "f": 0.7435799157
 
 
 
 
 
267
  },
268
  "NORP": {
269
+ "p": 0.6806930693,
270
+ "r": 0.5777310924,
271
+ "f": 0.625
272
+ },
273
+ "LOC": {
274
+ "p": 0.5765765766,
275
+ "r": 0.5161290323,
276
+ "f": 0.5446808511
277
  },
278
  "QUANTITY": {
279
+ "p": 0.6742424242,
280
+ "r": 0.6592592593,
281
+ "f": 0.6666666667
282
  },
283
  "PERSON": {
284
+ "p": 0.8638871858,
285
+ "r": 0.9078608247,
286
+ "f": 0.8853283066
287
+ },
288
+ "CARDINAL": {
289
+ "p": 0.6205821206,
290
+ "r": 0.6018145161,
291
+ "f": 0.6110542477
292
  },
293
  "TIME": {
294
+ "p": 0.7630331754,
295
+ "r": 0.7815533981,
296
+ "f": 0.7721822542
297
  },
298
  "WORK_OF_ART": {
299
+ "p": 0.4724409449,
300
+ "r": 0.4,
301
+ "f": 0.4332129964
302
  },
303
  "MONEY": {
304
+ "p": 0.8615384615,
305
  "r": 0.8296296296,
306
+ "f": 0.8452830189
307
  },
308
  "EVENT": {
309
+ "p": 0.5757575758,
310
+ "r": 0.6985294118,
311
+ "f": 0.6312292359
312
  },
313
  "PERCENT": {
314
+ "p": 0.7865168539,
315
+ "r": 0.843373494,
316
+ "f": 0.8139534884
317
  },
318
  "PRODUCT": {
319
+ "p": 0.4814814815,
320
+ "r": 0.2653061224,
321
+ "f": 0.3421052632
322
  },
323
  "LAW": {
324
+ "p": 0.3921568627,
325
+ "r": 0.3333333333,
326
+ "f": 0.3603603604
327
  },
328
  "LANGUAGE": {
329
+ "p": 0.75,
330
  "r": 1.0,
331
+ "f": 0.8571428571
332
  }
333
+ },
334
+ "speed": 4253.4050367787
335
  }
attribute_ruler/patterns CHANGED
Binary files a/attribute_ruler/patterns and b/attribute_ruler/patterns differ
 
config.cfg CHANGED
@@ -1,10 +1,8 @@
1
  [paths]
2
- train = "corpus/zh-core-news/train.spacy"
3
- dev = "corpus/zh-core-news/dev.spacy"
4
  vectors = null
5
- raw = null
6
  init_tok2vec = null
7
- vocab_data = null
8
 
9
  [system]
10
  gpu_allocator = "pytorch"
@@ -27,12 +25,14 @@ segmenter = "pkuseg"
27
 
28
  [components.attribute_ruler]
29
  factory = "attribute_ruler"
 
30
  validate = false
31
 
32
  [components.ner]
33
  factory = "ner"
34
  incorrect_spans_key = null
35
  moves = null
 
36
  update_with_oracle_cut_size = 100
37
 
38
  [components.ner.model]
@@ -55,6 +55,7 @@ factory = "parser"
55
  learn_tokens = false
56
  min_action_freq = 30
57
  moves = null
 
58
  update_with_oracle_cut_size = 100
59
 
60
  [components.parser.model]
@@ -74,6 +75,8 @@ pooling = {"@layers":"reduce_mean.v1"}
74
 
75
  [components.tagger]
76
  factory = "tagger"
 
 
77
 
78
  [components.tagger.model]
79
  @architectures = "spacy.Tagger.v1"
@@ -91,32 +94,37 @@ max_batch_items = 4096
91
  set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"}
92
 
93
  [components.transformer.model]
94
- @architectures = "spacy-transformers.TransformerModel.v1"
95
  name = "bert-base-chinese"
 
96
 
97
  [components.transformer.model.get_spans]
98
  @span_getters = "spacy-transformers.strided_spans.v1"
99
  window = 128
100
  stride = 96
101
 
 
 
102
  [components.transformer.model.tokenizer_config]
103
  use_fast = true
104
 
 
 
105
  [corpora]
106
 
107
  [corpora.dev]
108
  @readers = "spacy.Corpus.v1"
109
- limit = 0
110
- max_length = 0
111
- path = ${paths:dev}
112
  gold_preproc = false
 
 
113
  augmenter = null
114
 
115
  [corpora.train]
116
  @readers = "spacy.Corpus.v1"
117
- path = ${paths:train}
118
- max_length = 500
119
  gold_preproc = false
 
120
  limit = 0
121
  augmenter = null
122
 
@@ -174,11 +182,12 @@ ents_f = 0.32
174
  ents_p = 0.0
175
  ents_r = 0.0
176
  ents_per_type = null
 
177
 
178
  [pretraining]
179
 
180
  [initialize]
181
- vocab_data = ${paths.vocab_data}
182
  vectors = ${paths.vectors}
183
  init_tok2vec = ${paths.init_tok2vec}
184
  before_init = null
 
1
  [paths]
2
+ train = null
3
+ dev = null
4
  vectors = null
 
5
  init_tok2vec = null
 
6
 
7
  [system]
8
  gpu_allocator = "pytorch"
 
25
 
26
  [components.attribute_ruler]
27
  factory = "attribute_ruler"
28
+ scorer = {"@scorers":"spacy.attribute_ruler_scorer.v1"}
29
  validate = false
30
 
31
  [components.ner]
32
  factory = "ner"
33
  incorrect_spans_key = null
34
  moves = null
35
+ scorer = {"@scorers":"spacy.ner_scorer.v1"}
36
  update_with_oracle_cut_size = 100
37
 
38
  [components.ner.model]
 
55
  learn_tokens = false
56
  min_action_freq = 30
57
  moves = null
58
+ scorer = {"@scorers":"spacy.parser_scorer.v1"}
59
  update_with_oracle_cut_size = 100
60
 
61
  [components.parser.model]
 
75
 
76
  [components.tagger]
77
  factory = "tagger"
78
+ overwrite = false
79
+ scorer = {"@scorers":"spacy.tagger_scorer.v1"}
80
 
81
  [components.tagger.model]
82
  @architectures = "spacy.Tagger.v1"
 
94
  set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"}
95
 
96
  [components.transformer.model]
97
+ @architectures = "spacy-transformers.TransformerModel.v3"
98
  name = "bert-base-chinese"
99
+ mixed_precision = false
100
 
101
  [components.transformer.model.get_spans]
102
  @span_getters = "spacy-transformers.strided_spans.v1"
103
  window = 128
104
  stride = 96
105
 
106
+ [components.transformer.model.grad_scaler_config]
107
+
108
  [components.transformer.model.tokenizer_config]
109
  use_fast = true
110
 
111
+ [components.transformer.model.transformer_config]
112
+
113
  [corpora]
114
 
115
  [corpora.dev]
116
  @readers = "spacy.Corpus.v1"
117
+ path = ${paths.dev}
 
 
118
  gold_preproc = false
119
+ max_length = 0
120
+ limit = 0
121
  augmenter = null
122
 
123
  [corpora.train]
124
  @readers = "spacy.Corpus.v1"
125
+ path = ${paths.train}
 
126
  gold_preproc = false
127
+ max_length = 0
128
  limit = 0
129
  augmenter = null
130
 
 
182
  ents_p = 0.0
183
  ents_r = 0.0
184
  ents_per_type = null
185
+ speed = 0.0
186
 
187
  [pretraining]
188
 
189
  [initialize]
190
+ vocab_data = null
191
  vectors = ${paths.vectors}
192
  init_tok2vec = ${paths.init_tok2vec}
193
  before_init = null
meta.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "lang":"zh",
3
  "name":"core_web_trf",
4
- "version":"3.1.0",
5
  "description":"Chinese transformer pipeline (bert-base-chinese). Components: transformer, tagger, parser, ner, attribute_ruler.",
6
  "author":"Explosion",
7
  "email":"contact@explosion.ai",
8
  "url":"https://explosion.ai",
9
  "license":"MIT",
10
- "spacy_version":">=3.1.0,<3.2.0",
11
- "spacy_git_version":"caba63b74",
12
  "vectors":{
13
  "width":0,
14
  "vectors":0,
@@ -147,41 +147,40 @@
147
  ],
148
  "performance":{
149
  "token_acc":0.9788303388,
150
- "tag_acc":0.92444533,
151
- "dep_uas":0.7661671924,
152
- "dep_las":0.727981345,
153
- "ents_p":0.6744976507,
154
- "ents_r":0.7414285714,
155
- "ents_f":0.7063811967,
156
- "sents_p":0.6917940608,
157
- "sents_r":0.655402031,
158
- "sents_f":0.6731065139,
159
- "speed":4304.7686585922,
160
  "dep_las_per_type":{
161
  "dep":{
162
- "p":0.5611908839,
163
- "r":0.4304334647,
164
- "f":0.4871912168
165
  },
166
  "case":{
167
- "p":0.9069435432,
168
- "r":0.8472356935,
169
- "f":0.8760734658
170
  },
171
  "nmod:tmod":{
172
- "p":0.8046448087,
173
- "r":0.8013605442,
174
- "f":0.8029993183
175
  },
176
  "nummod":{
177
- "p":0.9012345679,
178
- "r":0.583610926,
179
- "f":0.7084512738
180
  },
181
  "mark:clf":{
182
- "p":0.9517326733,
183
  "r":0.5736665423,
184
- "f":0.7158482662
185
  },
186
  "auxpass":{
187
  "p":0.9293478261,
@@ -189,189 +188,189 @@
189
  "f":0.9268292683
190
  },
191
  "nsubj":{
192
- "p":0.8733342307,
193
- "r":0.7969536912,
194
- "f":0.8333975594
195
  },
196
  "acl":{
197
- "p":0.8216805645,
198
- "r":0.7104825291,
199
- "f":0.762046401
200
  },
201
  "advmod":{
202
- "p":0.8829383266,
203
- "r":0.7643391521,
204
- "f":0.819369342
205
  },
206
  "mark":{
207
- "p":0.865225391,
208
- "r":0.82427695,
209
- "f":0.8442549372
210
  },
211
  "xcomp":{
212
- "p":0.8221343874,
213
- "r":0.67752443,
214
- "f":0.7428571429
215
  },
216
  "nmod:assmod":{
217
- "p":0.8709787817,
218
- "r":0.7920946156,
219
- "f":0.8296658517
220
  },
221
  "det":{
222
- "p":0.9033306255,
223
- "r":0.6514352665,
224
- "f":0.7569775357
225
  },
226
  "amod":{
227
- "p":0.8509749304,
228
- "r":0.7199528672,
229
- "f":0.78
230
  },
231
  "nmod:prep":{
232
- "p":0.8192449048,
233
- "r":0.7416817907,
234
- "f":0.7785362756
235
  },
236
  "root":{
237
- "p":0.7726093403,
238
- "r":0.6940236391,
239
- "f":0.7312110848
240
  },
241
  "aux:prtmod":{
242
- "p":0.9042145594,
243
- "r":0.8428571429,
244
- "f":0.8724584104
245
  },
246
  "compound:nn":{
247
- "p":0.8040945994,
248
- "r":0.7708967851,
249
- "f":0.7871458189
250
  },
251
  "dobj":{
252
- "p":0.9081300813,
253
- "r":0.8272848467,
254
- "f":0.8658243547
255
  },
256
  "ccomp":{
257
- "p":0.7877000842,
258
- "r":0.7270606532,
259
- "f":0.7561665993
260
  },
261
  "advmod:rcomp":{
262
- "p":0.8475609756,
263
- "r":0.7700831025,
264
- "f":0.8069666183
265
  },
266
  "nmod:topic":{
267
- "p":0.5434782609,
268
- "r":0.487012987,
269
- "f":0.5136986301
270
  },
271
  "cop":{
272
- "p":0.8351555929,
273
- "r":0.638996139,
274
- "f":0.7240247904
275
  },
276
  "discourse":{
277
- "p":0.6153846154,
278
- "r":0.5478547855,
279
- "f":0.5796595373
280
  },
281
  "neg":{
282
- "p":0.8932496075,
283
- "r":0.6765755054,
284
- "f":0.7699594046
285
  },
286
  "aux:modal":{
287
- "p":0.9019823789,
288
- "r":0.8469493278,
289
- "f":0.8736
290
  },
291
  "nmod":{
292
- "p":0.7988505747,
293
- "r":0.7544097693,
294
- "f":0.7759944173
295
  },
296
  "aux:ba":{
297
- "p":0.9265536723,
298
- "r":0.8723404255,
299
- "f":0.898630137
300
  },
301
  "advmod:loc":{
302
- "p":0.80859375,
303
- "r":0.6142433234,
304
- "f":0.6981450253
305
  },
306
  "aux:asp":{
307
- "p":0.9320882852,
308
- "r":0.8755980861,
309
- "f":0.9029605263
310
  },
311
  "conj":{
312
- "p":0.6313854489,
313
- "r":0.6168241966,
314
- "f":0.6240198891
315
  },
316
  "nsubjpass":{
317
- "p":0.8913043478,
318
- "r":0.82,
319
- "f":0.8541666667
320
  },
321
  "compound:vc":{
322
- "p":0.5459183673,
323
- "r":0.5544041451,
324
- "f":0.5501285347
325
  },
326
  "advcl:loc":{
327
- "p":0.7586206897,
328
- "r":0.6285714286,
329
- "f":0.6875
330
  },
331
  "cc":{
332
- "p":0.7972477064,
333
- "r":0.7710736469,
334
- "f":0.7839422643
335
  },
336
  "advmod:dvp":{
337
- "p":0.9076923077,
338
- "r":0.7329192547,
339
- "f":0.8109965636
340
  },
341
  "amod:ordmod":{
342
- "p":0.6666666667,
343
- "r":0.59375,
344
- "f":0.6280991736
345
  },
346
  "appos":{
347
- "p":0.9434889435,
348
- "r":0.8827586207,
349
- "f":0.9121140143
350
  },
351
  "nmod:poss":{
352
- "p":0.7647058824,
353
- "r":0.6740740741,
354
- "f":0.7165354331
355
  },
356
  "name":{
357
- "p":0.6097560976,
358
- "r":0.5555555556,
359
- "f":0.5813953488
360
  },
361
  "nsubj:xsubj":{
362
- "p":0.5,
363
  "r":0.1,
364
- "f":0.1666666667
365
  },
366
  "nmod:range":{
367
- "p":0.8295454545,
368
- "r":0.7348993289,
369
- "f":0.7793594306
370
  },
371
  "parataxis:prnmod":{
372
- "p":0.3663366337,
373
- "r":0.2781954887,
374
- "f":0.3162393162
375
  },
376
  "erased":{
377
  "p":0.0,
@@ -379,103 +378,107 @@
379
  "f":0.0
380
  },
381
  "etc":{
382
- "p":0.9285714286,
383
  "r":0.9285714286,
384
- "f":0.9285714286
385
  }
386
  },
 
 
 
387
  "ents_per_type":{
388
  "DATE":{
389
- "p":0.6931530008,
390
- "r":0.8126858276,
391
- "f":0.7481751825
392
  },
393
  "GPE":{
394
- "p":0.792633015,
395
- "r":0.8519061584,
396
- "f":0.8212014134
397
- },
398
- "CARDINAL":{
399
- "p":0.5527502254,
400
- "r":0.6179435484,
401
- "f":0.5835316516
402
  },
403
  "ORDINAL":{
404
- "p":0.8287292818,
405
  "r":0.7894736842,
406
- "f":0.8086253369
407
  },
408
  "FAC":{
409
- "p":0.5301204819,
410
- "r":0.4731182796,
411
- "f":0.5
412
  },
413
  "ORG":{
414
- "p":0.7304479879,
415
- "r":0.7321156773,
416
- "f":0.7312808818
417
- },
418
- "LOC":{
419
- "p":0.1899383984,
420
- "r":0.497311828,
421
- "f":0.2748885587
422
  },
423
  "NORP":{
424
- "p":0.6797900262,
425
- "r":0.5441176471,
426
- "f":0.6044340723
 
 
 
 
 
427
  },
428
  "QUANTITY":{
429
- "p":0.7363636364,
430
- "r":0.6,
431
- "f":0.6612244898
432
  },
433
  "PERSON":{
434
- "p":0.8649842271,
435
- "r":0.8833762887,
436
- "f":0.8740835193
 
 
 
 
 
437
  },
438
  "TIME":{
439
- "p":0.711627907,
440
- "r":0.7427184466,
441
- "f":0.7268408551
442
  },
443
  "WORK_OF_ART":{
444
- "p":0.1849315068,
445
- "r":0.18,
446
- "f":0.1824324324
447
  },
448
  "MONEY":{
449
- "p":0.8682170543,
450
  "r":0.8296296296,
451
- "f":0.8484848485
452
  },
453
  "EVENT":{
454
- "p":0.5804195804,
455
- "r":0.6102941176,
456
- "f":0.5949820789
457
  },
458
  "PERCENT":{
459
- "p":0.7640449438,
460
- "r":0.8192771084,
461
- "f":0.7906976744
462
  },
463
  "PRODUCT":{
464
- "p":0.5384615385,
465
- "r":0.1428571429,
466
- "f":0.2258064516
467
  },
468
  "LAW":{
469
- "p":0.3076923077,
470
- "r":0.2666666667,
471
- "f":0.2857142857
472
  },
473
  "LANGUAGE":{
474
- "p":0.8181818182,
475
  "r":1.0,
476
- "f":0.9
477
  }
478
- }
 
479
  },
480
  "sources":[
481
  {
@@ -498,7 +501,7 @@
498
  }
499
  ],
500
  "requirements":[
501
- "spacy-transformers>=1.0.3,<1.1.0",
502
  "spacy-pkuseg>=0.0.27,<0.1.0"
503
  ]
504
  }
 
1
  {
2
  "lang":"zh",
3
  "name":"core_web_trf",
4
+ "version":"3.2.0",
5
  "description":"Chinese transformer pipeline (bert-base-chinese). Components: transformer, tagger, parser, ner, attribute_ruler.",
6
  "author":"Explosion",
7
  "email":"contact@explosion.ai",
8
  "url":"https://explosion.ai",
9
  "license":"MIT",
10
+ "spacy_version":">=3.2.0,<3.3.0",
11
+ "spacy_git_version":"bb26550e2",
12
  "vectors":{
13
  "width":0,
14
  "vectors":0,
 
147
  ],
148
  "performance":{
149
  "token_acc":0.9788303388,
150
+ "token_p":0.9458325855,
151
+ "token_r":0.9136060443,
152
+ "token_f":0.9294400505,
153
+ "tag_acc":0.9247167985,
154
+ "sents_p":0.7110739502,
155
+ "sents_r":0.6370900616,
156
+ "sents_f":0.67205198,
157
+ "dep_uas":0.7683558244,
158
+ "dep_las":0.7307018312,
 
159
  "dep_las_per_type":{
160
  "dep":{
161
+ "p":0.5656512605,
162
+ "r":0.4352834192,
163
+ "f":0.4919773882
164
  },
165
  "case":{
166
+ "p":0.9131065401,
167
+ "r":0.8395974782,
168
+ "f":0.8748105104
169
  },
170
  "nmod:tmod":{
171
+ "p":0.8136054422,
172
+ "r":0.8136054422,
173
+ "f":0.8136054422
174
  },
175
  "nummod":{
176
+ "p":0.9011213048,
177
+ "r":0.5889407062,
178
+ "f":0.7123287671
179
  },
180
  "mark:clf":{
181
+ "p":0.9499691167,
182
  "r":0.5736665423,
183
+ "f":0.7153488372
184
  },
185
  "auxpass":{
186
  "p":0.9293478261,
 
188
  "f":0.9268292683
189
  },
190
  "nsubj":{
191
+ "p":0.8800107614,
192
+ "r":0.803586783,
193
+ "f":0.8400642055
194
  },
195
  "acl":{
196
+ "p":0.8209331652,
197
+ "r":0.7221297837,
198
+ "f":0.7683682502
199
  },
200
  "advmod":{
201
+ "p":0.8834787137,
202
+ "r":0.7692133303,
203
+ "f":0.822395928
204
  },
205
  "mark":{
206
+ "p":0.8492417484,
207
+ "r":0.8343558282,
208
+ "f":0.8417329797
209
  },
210
  "xcomp":{
211
+ "p":0.8368522073,
212
+ "r":0.7100977199,
213
+ "f":0.7682819383
214
  },
215
  "nmod:assmod":{
216
+ "p":0.8744897959,
217
+ "r":0.8001867414,
218
+ "f":0.8356899074
219
  },
220
  "det":{
221
+ "p":0.8905872888,
222
+ "r":0.6485061511,
223
+ "f":0.7505084746
224
  },
225
  "amod":{
226
+ "p":0.8427987279,
227
+ "r":0.7285938727,
228
+ "f":0.7815462397
229
  },
230
  "nmod:prep":{
231
+ "p":0.8286297178,
232
+ "r":0.7371445856,
233
+ "f":0.780214503
234
  },
235
  "root":{
236
+ "p":0.792271479,
237
+ "r":0.6723822207,
238
+ "f":0.727420081
239
  },
240
  "aux:prtmod":{
241
+ "p":0.9251968504,
242
+ "r":0.8392857143,
243
+ "f":0.8801498127
244
  },
245
  "compound:nn":{
246
+ "p":0.8133166279,
247
+ "r":0.7688663283,
248
+ "f":0.7904670784
249
  },
250
  "dobj":{
251
+ "p":0.9164218087,
252
+ "r":0.8315805066,
253
+ "f":0.8719422226
254
  },
255
  "ccomp":{
256
+ "p":0.7944862155,
257
+ "r":0.7395023328,
258
+ "f":0.7660088602
259
  },
260
  "advmod:rcomp":{
261
+ "p":0.8541033435,
262
+ "r":0.7783933518,
263
+ "f":0.8144927536
264
  },
265
  "nmod:topic":{
266
+ "p":0.5104166667,
267
+ "r":0.4772727273,
268
+ "f":0.4932885906
269
  },
270
  "cop":{
271
+ "p":0.8533218292,
272
+ "r":0.6364221364,
273
+ "f":0.7290821968
274
  },
275
  "discourse":{
276
+ "p":0.6257833483,
277
+ "r":0.5767326733,
278
+ "f":0.6002576213
279
  },
280
  "neg":{
281
+ "p":0.898089172,
282
+ "r":0.6706302021,
283
+ "f":0.7678692988
284
  },
285
  "aux:modal":{
286
+ "p":0.9086908691,
287
+ "r":0.854188211,
288
+ "f":0.8805970149
289
  },
290
  "nmod":{
291
+ "p":0.8080808081,
292
+ "r":0.7598371777,
293
+ "f":0.7832167832
294
  },
295
  "aux:ba":{
296
+ "p":0.9485714286,
297
+ "r":0.8829787234,
298
+ "f":0.914600551
299
  },
300
  "advmod:loc":{
301
+ "p":0.803030303,
302
+ "r":0.6290801187,
303
+ "f":0.7054908486
304
  },
305
  "aux:asp":{
306
+ "p":0.9346349745,
307
+ "r":0.8779904306,
308
+ "f":0.9054276316
309
  },
310
  "conj":{
311
+ "p":0.6249059443,
312
+ "r":0.6279773157,
313
+ "f":0.6264378654
314
  },
315
  "nsubjpass":{
316
+ "p":0.9302325581,
317
+ "r":0.8,
318
+ "f":0.8602150538
319
  },
320
  "compound:vc":{
321
+ "p":0.5380952381,
322
+ "r":0.585492228,
323
+ "f":0.5607940447
324
  },
325
  "advcl:loc":{
326
+ "p":0.7894736842,
327
+ "r":0.6428571429,
328
+ "f":0.7086614173
329
  },
330
  "cc":{
331
+ "p":0.8191287879,
332
+ "r":0.7675244011,
333
+ "f":0.7924874027
334
  },
335
  "advmod:dvp":{
336
+ "p":0.8731343284,
337
+ "r":0.7267080745,
338
+ "f":0.793220339
339
  },
340
  "amod:ordmod":{
341
+ "p":0.640625,
342
+ "r":0.640625,
343
+ "f":0.640625
344
  },
345
  "appos":{
346
+ "p":0.9443099274,
347
+ "r":0.8965517241,
348
+ "f":0.9198113208
349
  },
350
  "nmod:poss":{
351
+ "p":0.796460177,
352
+ "r":0.6666666667,
353
+ "f":0.7258064516
354
  },
355
  "name":{
356
+ "p":0.6635514019,
357
+ "r":0.5259259259,
358
+ "f":0.5867768595
359
  },
360
  "nsubj:xsubj":{
361
+ "p":0.25,
362
  "r":0.1,
363
+ "f":0.1428571429
364
  },
365
  "nmod:range":{
366
+ "p":0.8365019011,
367
+ "r":0.7382550336,
368
+ "f":0.7843137255
369
  },
370
  "parataxis:prnmod":{
371
+ "p":0.3645833333,
372
+ "r":0.2631578947,
373
+ "f":0.3056768559
374
  },
375
  "erased":{
376
  "p":0.0,
 
378
  "f":0.0
379
  },
380
  "etc":{
381
+ "p":0.8965517241,
382
  "r":0.9285714286,
383
+ "f":0.9122807018
384
  }
385
  },
386
+ "ents_p":0.746365105,
387
+ "ents_r":0.7615384615,
388
+ "ents_f":0.7538754419,
389
  "ents_per_type":{
390
  "DATE":{
391
+ "p":0.7673357664,
392
+ "r":0.8334985134,
393
+ "f":0.7990498812
394
  },
395
  "GPE":{
396
+ "p":0.7898259705,
397
+ "r":0.8651026393,
398
+ "f":0.8257522743
 
 
 
 
 
399
  },
400
  "ORDINAL":{
401
+ "p":0.8720930233,
402
  "r":0.7894736842,
403
+ "f":0.8287292818
404
  },
405
  "FAC":{
406
+ "p":0.4685990338,
407
+ "r":0.5215053763,
408
+ "f":0.4936386768
409
  },
410
  "ORG":{
411
+ "p":0.749034749,
412
+ "r":0.7382039574,
413
+ "f":0.7435799157
 
 
 
 
 
414
  },
415
  "NORP":{
416
+ "p":0.6806930693,
417
+ "r":0.5777310924,
418
+ "f":0.625
419
+ },
420
+ "LOC":{
421
+ "p":0.5765765766,
422
+ "r":0.5161290323,
423
+ "f":0.5446808511
424
  },
425
  "QUANTITY":{
426
+ "p":0.6742424242,
427
+ "r":0.6592592593,
428
+ "f":0.6666666667
429
  },
430
  "PERSON":{
431
+ "p":0.8638871858,
432
+ "r":0.9078608247,
433
+ "f":0.8853283066
434
+ },
435
+ "CARDINAL":{
436
+ "p":0.6205821206,
437
+ "r":0.6018145161,
438
+ "f":0.6110542477
439
  },
440
  "TIME":{
441
+ "p":0.7630331754,
442
+ "r":0.7815533981,
443
+ "f":0.7721822542
444
  },
445
  "WORK_OF_ART":{
446
+ "p":0.4724409449,
447
+ "r":0.4,
448
+ "f":0.4332129964
449
  },
450
  "MONEY":{
451
+ "p":0.8615384615,
452
  "r":0.8296296296,
453
+ "f":0.8452830189
454
  },
455
  "EVENT":{
456
+ "p":0.5757575758,
457
+ "r":0.6985294118,
458
+ "f":0.6312292359
459
  },
460
  "PERCENT":{
461
+ "p":0.7865168539,
462
+ "r":0.843373494,
463
+ "f":0.8139534884
464
  },
465
  "PRODUCT":{
466
+ "p":0.4814814815,
467
+ "r":0.2653061224,
468
+ "f":0.3421052632
469
  },
470
  "LAW":{
471
+ "p":0.3921568627,
472
+ "r":0.3333333333,
473
+ "f":0.3603603604
474
  },
475
  "LANGUAGE":{
476
+ "p":0.75,
477
  "r":1.0,
478
+ "f":0.8571428571
479
  }
480
+ },
481
+ "speed":4253.4050367787
482
  },
483
  "sources":[
484
  {
 
501
  }
502
  ],
503
  "requirements":[
504
+ "spacy-transformers>=1.1.2,<1.2.0",
505
  "spacy-pkuseg>=0.0.27,<0.1.0"
506
  ]
507
  }
ner/model CHANGED
Binary files a/ner/model and b/ner/model differ
 
parser/model CHANGED
Binary files a/parser/model and b/parser/model differ
 
tagger/cfg CHANGED
@@ -36,5 +36,6 @@
36
  "VE",
37
  "VV",
38
  "X"
39
- ]
 
40
  }
 
36
  "VE",
37
  "VV",
38
  "X"
39
+ ],
40
+ "overwrite":false
41
  }
tagger/model CHANGED
Binary files a/tagger/model and b/tagger/model differ
 
transformer/{model/pytorch_model.bin β†’ model} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88b011d7e6facc2f530831168b8e50391dc0c6fa262867559a70bc73df8c3294
3
- size 409149169
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea9e45b2406388c09a3ab4b2bc81714af5dc666e990b66d03e59e542a9681840
3
+ size 409530061
transformer/model/config.json DELETED
@@ -1,30 +0,0 @@
1
- {
2
- "_name_or_path": "/mnt/scratch/tmp/zh_core_web_trf/d999ed1d-d7a0-4c09-b6e1-c8df4f70f55c/training/core/model-best/transformer/model",
3
- "architectures": [
4
- "BertForMaskedLM"
5
- ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "directionality": "bidi",
8
- "gradient_checkpointing": false,
9
- "hidden_act": "gelu",
10
- "hidden_dropout_prob": 0.1,
11
- "hidden_size": 768,
12
- "initializer_range": 0.02,
13
- "intermediate_size": 3072,
14
- "layer_norm_eps": 1e-12,
15
- "max_position_embeddings": 512,
16
- "model_type": "bert",
17
- "num_attention_heads": 12,
18
- "num_hidden_layers": 12,
19
- "pad_token_id": 0,
20
- "pooler_fc_size": 768,
21
- "pooler_num_attention_heads": 12,
22
- "pooler_num_fc_layers": 3,
23
- "pooler_size_per_head": 128,
24
- "pooler_type": "first_token_transform",
25
- "position_embedding_type": "absolute",
26
- "transformers_version": "4.6.1",
27
- "type_vocab_size": 2,
28
- "use_cache": true,
29
- "vocab_size": 21128
30
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
transformer/model/special_tokens_map.json DELETED
@@ -1 +0,0 @@
1
- {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
 
 
transformer/model/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
transformer/model/tokenizer_config.json DELETED
@@ -1 +0,0 @@
1
- {"do_lower_case": false, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "/mnt/scratch/tmp/zh_core_web_trf/d999ed1d-d7a0-4c09-b6e1-c8df4f70f55c/training/core/model-best/transformer/model"}
 
 
transformer/model/vocab.txt DELETED
The diff for this file is too large to render. See raw diff
 
vocab/vectors.cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "mode":"default"
3
+ }
zh_core_web_trf-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e54306e18ab65a96dcd13dced77de99741ef9a8475f52d81c5f28fdea5c147eb
3
- size 417437795
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9007fe7c4c1003f136b044fe00ae6fdd75524578cab3a52e3921c5c0f28ffa40
3
+ size 417438380