peg_re
#1
by
yolay
- opened
- README.md +0 -632
- config.json +1 -1
- pytorch_model.bin +2 -2
README.md
CHANGED
@@ -7,626 +7,14 @@ tags:
|
|
7 |
- feature-extraction
|
8 |
- sentence-similarity
|
9 |
- transformers
|
10 |
-
- mteb
|
11 |
-
model-index:
|
12 |
-
- name: PEG
|
13 |
-
results:
|
14 |
-
- task:
|
15 |
-
type: Reranking
|
16 |
-
dataset:
|
17 |
-
type: C-MTEB/CMedQAv1-reranking
|
18 |
-
name: MTEB CMedQAv1
|
19 |
-
config: default
|
20 |
-
split: test
|
21 |
-
revision: None
|
22 |
-
metrics:
|
23 |
-
- type: map
|
24 |
-
value: 84.09137463267582
|
25 |
-
- type: mrr
|
26 |
-
value: 86.6288888888889
|
27 |
-
- task:
|
28 |
-
type: Reranking
|
29 |
-
dataset:
|
30 |
-
type: C-MTEB/CMedQAv2-reranking
|
31 |
-
name: MTEB CMedQAv2
|
32 |
-
config: default
|
33 |
-
split: test
|
34 |
-
revision: None
|
35 |
-
metrics:
|
36 |
-
- type: map
|
37 |
-
value: 86.55765031914974
|
38 |
-
- type: mrr
|
39 |
-
value: 89.4325396825397
|
40 |
-
- task:
|
41 |
-
type: Retrieval
|
42 |
-
dataset:
|
43 |
-
type: C_MTEB/CmedqaRetrieval
|
44 |
-
name: MTEB CmedqaRetrieval
|
45 |
-
config: default
|
46 |
-
split: dev
|
47 |
-
revision: None
|
48 |
-
metrics:
|
49 |
-
- type: map_at_1
|
50 |
-
value: 26.101000000000003
|
51 |
-
- type: map_at_10
|
52 |
-
value: 38.239000000000004
|
53 |
-
- type: map_at_100
|
54 |
-
value: 40.083
|
55 |
-
- type: map_at_1000
|
56 |
-
value: 40.205
|
57 |
-
- type: map_at_3
|
58 |
-
value: 34.386
|
59 |
-
- type: map_at_5
|
60 |
-
value: 36.425999999999995
|
61 |
-
- type: mrr_at_1
|
62 |
-
value: 39.434999999999995
|
63 |
-
- type: mrr_at_10
|
64 |
-
value: 46.967999999999996
|
65 |
-
- type: mrr_at_100
|
66 |
-
value: 47.946
|
67 |
-
- type: mrr_at_1000
|
68 |
-
value: 47.997
|
69 |
-
- type: mrr_at_3
|
70 |
-
value: 44.803
|
71 |
-
- type: mrr_at_5
|
72 |
-
value: 45.911
|
73 |
-
- type: ndcg_at_1
|
74 |
-
value: 39.434999999999995
|
75 |
-
- type: ndcg_at_10
|
76 |
-
value: 44.416
|
77 |
-
- type: ndcg_at_100
|
78 |
-
value: 51.773
|
79 |
-
- type: ndcg_at_1000
|
80 |
-
value: 53.888000000000005
|
81 |
-
- type: ndcg_at_3
|
82 |
-
value: 39.816
|
83 |
-
- type: ndcg_at_5
|
84 |
-
value: 41.467999999999996
|
85 |
-
- type: precision_at_1
|
86 |
-
value: 39.434999999999995
|
87 |
-
- type: precision_at_10
|
88 |
-
value: 9.786999999999999
|
89 |
-
- type: precision_at_100
|
90 |
-
value: 1.5810000000000002
|
91 |
-
- type: precision_at_1000
|
92 |
-
value: 0.184
|
93 |
-
- type: precision_at_3
|
94 |
-
value: 22.414
|
95 |
-
- type: precision_at_5
|
96 |
-
value: 15.943999999999999
|
97 |
-
- type: recall_at_1
|
98 |
-
value: 26.101000000000003
|
99 |
-
- type: recall_at_10
|
100 |
-
value: 53.82900000000001
|
101 |
-
- type: recall_at_100
|
102 |
-
value: 84.63199999999999
|
103 |
-
- type: recall_at_1000
|
104 |
-
value: 98.782
|
105 |
-
- type: recall_at_3
|
106 |
-
value: 39.585
|
107 |
-
- type: recall_at_5
|
108 |
-
value: 45.141
|
109 |
-
- task:
|
110 |
-
type: Retrieval
|
111 |
-
dataset:
|
112 |
-
type: C_MTEB/CovidRetrieval
|
113 |
-
name: MTEB CovidRetrieval
|
114 |
-
config: default
|
115 |
-
split: dev
|
116 |
-
revision: None
|
117 |
-
metrics:
|
118 |
-
- type: map_at_1
|
119 |
-
value: 70.39
|
120 |
-
- type: map_at_10
|
121 |
-
value: 78.93599999999999
|
122 |
-
- type: map_at_100
|
123 |
-
value: 79.202
|
124 |
-
- type: map_at_1000
|
125 |
-
value: 79.205
|
126 |
-
- type: map_at_3
|
127 |
-
value: 77.538
|
128 |
-
- type: map_at_5
|
129 |
-
value: 78.312
|
130 |
-
- type: mrr_at_1
|
131 |
-
value: 70.706
|
132 |
-
- type: mrr_at_10
|
133 |
-
value: 79.018
|
134 |
-
- type: mrr_at_100
|
135 |
-
value: 79.28399999999999
|
136 |
-
- type: mrr_at_1000
|
137 |
-
value: 79.288
|
138 |
-
- type: mrr_at_3
|
139 |
-
value: 77.713
|
140 |
-
- type: mrr_at_5
|
141 |
-
value: 78.462
|
142 |
-
- type: ndcg_at_1
|
143 |
-
value: 70.601
|
144 |
-
- type: ndcg_at_10
|
145 |
-
value: 82.555
|
146 |
-
- type: ndcg_at_100
|
147 |
-
value: 83.718
|
148 |
-
- type: ndcg_at_1000
|
149 |
-
value: 83.855
|
150 |
-
- type: ndcg_at_3
|
151 |
-
value: 79.779
|
152 |
-
- type: ndcg_at_5
|
153 |
-
value: 81.149
|
154 |
-
- type: precision_at_1
|
155 |
-
value: 70.601
|
156 |
-
- type: precision_at_10
|
157 |
-
value: 9.463000000000001
|
158 |
-
- type: precision_at_100
|
159 |
-
value: 0.9979999999999999
|
160 |
-
- type: precision_at_1000
|
161 |
-
value: 0.101
|
162 |
-
- type: precision_at_3
|
163 |
-
value: 28.871999999999996
|
164 |
-
- type: precision_at_5
|
165 |
-
value: 18.019
|
166 |
-
- type: recall_at_1
|
167 |
-
value: 70.39
|
168 |
-
- type: recall_at_10
|
169 |
-
value: 93.572
|
170 |
-
- type: recall_at_100
|
171 |
-
value: 98.736
|
172 |
-
- type: recall_at_1000
|
173 |
-
value: 99.895
|
174 |
-
- type: recall_at_3
|
175 |
-
value: 86.091
|
176 |
-
- type: recall_at_5
|
177 |
-
value: 89.384
|
178 |
-
- task:
|
179 |
-
type: Retrieval
|
180 |
-
dataset:
|
181 |
-
type: C_MTEB/DuRetrieval
|
182 |
-
name: MTEB DuRetrieval
|
183 |
-
config: default
|
184 |
-
split: dev
|
185 |
-
revision: None
|
186 |
-
metrics:
|
187 |
-
- type: map_at_1
|
188 |
-
value: 26.147
|
189 |
-
- type: map_at_10
|
190 |
-
value: 80.205
|
191 |
-
- type: map_at_100
|
192 |
-
value: 82.96
|
193 |
-
- type: map_at_1000
|
194 |
-
value: 82.999
|
195 |
-
- type: map_at_3
|
196 |
-
value: 55.16799999999999
|
197 |
-
- type: map_at_5
|
198 |
-
value: 69.798
|
199 |
-
- type: mrr_at_1
|
200 |
-
value: 89.8
|
201 |
-
- type: mrr_at_10
|
202 |
-
value: 93.16799999999999
|
203 |
-
- type: mrr_at_100
|
204 |
-
value: 93.22500000000001
|
205 |
-
- type: mrr_at_1000
|
206 |
-
value: 93.228
|
207 |
-
- type: mrr_at_3
|
208 |
-
value: 92.85
|
209 |
-
- type: mrr_at_5
|
210 |
-
value: 93.067
|
211 |
-
- type: ndcg_at_1
|
212 |
-
value: 89.8
|
213 |
-
- type: ndcg_at_10
|
214 |
-
value: 87.668
|
215 |
-
- type: ndcg_at_100
|
216 |
-
value: 90.16
|
217 |
-
- type: ndcg_at_1000
|
218 |
-
value: 90.505
|
219 |
-
- type: ndcg_at_3
|
220 |
-
value: 85.842
|
221 |
-
- type: ndcg_at_5
|
222 |
-
value: 85.101
|
223 |
-
- type: precision_at_1
|
224 |
-
value: 89.8
|
225 |
-
- type: precision_at_10
|
226 |
-
value: 42.225
|
227 |
-
- type: precision_at_100
|
228 |
-
value: 4.8149999999999995
|
229 |
-
- type: precision_at_1000
|
230 |
-
value: 0.48900000000000005
|
231 |
-
- type: precision_at_3
|
232 |
-
value: 76.967
|
233 |
-
- type: precision_at_5
|
234 |
-
value: 65.32
|
235 |
-
- type: recall_at_1
|
236 |
-
value: 26.147
|
237 |
-
- type: recall_at_10
|
238 |
-
value: 89.30399999999999
|
239 |
-
- type: recall_at_100
|
240 |
-
value: 97.609
|
241 |
-
- type: recall_at_1000
|
242 |
-
value: 99.409
|
243 |
-
- type: recall_at_3
|
244 |
-
value: 57.56
|
245 |
-
- type: recall_at_5
|
246 |
-
value: 74.78200000000001
|
247 |
-
- task:
|
248 |
-
type: Retrieval
|
249 |
-
dataset:
|
250 |
-
type: C_MTEB/EcomRetrieval
|
251 |
-
name: MTEB EcomRetrieval
|
252 |
-
config: default
|
253 |
-
split: dev
|
254 |
-
revision: None
|
255 |
-
metrics:
|
256 |
-
- type: map_at_1
|
257 |
-
value: 53.300000000000004
|
258 |
-
- type: map_at_10
|
259 |
-
value: 62.507000000000005
|
260 |
-
- type: map_at_100
|
261 |
-
value: 63.068000000000005
|
262 |
-
- type: map_at_1000
|
263 |
-
value: 63.08200000000001
|
264 |
-
- type: map_at_3
|
265 |
-
value: 60.050000000000004
|
266 |
-
- type: map_at_5
|
267 |
-
value: 61.41
|
268 |
-
- type: mrr_at_1
|
269 |
-
value: 53.300000000000004
|
270 |
-
- type: mrr_at_10
|
271 |
-
value: 62.507000000000005
|
272 |
-
- type: mrr_at_100
|
273 |
-
value: 63.068000000000005
|
274 |
-
- type: mrr_at_1000
|
275 |
-
value: 63.08200000000001
|
276 |
-
- type: mrr_at_3
|
277 |
-
value: 60.050000000000004
|
278 |
-
- type: mrr_at_5
|
279 |
-
value: 61.41
|
280 |
-
- type: ndcg_at_1
|
281 |
-
value: 53.300000000000004
|
282 |
-
- type: ndcg_at_10
|
283 |
-
value: 67.31700000000001
|
284 |
-
- type: ndcg_at_100
|
285 |
-
value: 69.862
|
286 |
-
- type: ndcg_at_1000
|
287 |
-
value: 70.231
|
288 |
-
- type: ndcg_at_3
|
289 |
-
value: 62.222
|
290 |
-
- type: ndcg_at_5
|
291 |
-
value: 64.66300000000001
|
292 |
-
- type: precision_at_1
|
293 |
-
value: 53.300000000000004
|
294 |
-
- type: precision_at_10
|
295 |
-
value: 8.260000000000002
|
296 |
-
- type: precision_at_100
|
297 |
-
value: 0.941
|
298 |
-
- type: precision_at_1000
|
299 |
-
value: 0.097
|
300 |
-
- type: precision_at_3
|
301 |
-
value: 22.833000000000002
|
302 |
-
- type: precision_at_5
|
303 |
-
value: 14.879999999999999
|
304 |
-
- type: recall_at_1
|
305 |
-
value: 53.300000000000004
|
306 |
-
- type: recall_at_10
|
307 |
-
value: 82.6
|
308 |
-
- type: recall_at_100
|
309 |
-
value: 94.1
|
310 |
-
- type: recall_at_1000
|
311 |
-
value: 97.0
|
312 |
-
- type: recall_at_3
|
313 |
-
value: 68.5
|
314 |
-
- type: recall_at_5
|
315 |
-
value: 74.4
|
316 |
-
- task:
|
317 |
-
type: Retrieval
|
318 |
-
dataset:
|
319 |
-
type: C_MTEB/MMarcoRetrieval
|
320 |
-
name: MTEB MMarcoRetrieval
|
321 |
-
config: default
|
322 |
-
split: dev
|
323 |
-
revision: None
|
324 |
-
metrics:
|
325 |
-
- type: map_at_1
|
326 |
-
value: 70.68799999999999
|
327 |
-
- type: map_at_10
|
328 |
-
value: 79.28399999999999
|
329 |
-
- type: map_at_100
|
330 |
-
value: 79.537
|
331 |
-
- type: map_at_1000
|
332 |
-
value: 79.545
|
333 |
-
- type: map_at_3
|
334 |
-
value: 77.643
|
335 |
-
- type: map_at_5
|
336 |
-
value: 78.694
|
337 |
-
- type: mrr_at_1
|
338 |
-
value: 73.05199999999999
|
339 |
-
- type: mrr_at_10
|
340 |
-
value: 79.794
|
341 |
-
- type: mrr_at_100
|
342 |
-
value: 80.024
|
343 |
-
- type: mrr_at_1000
|
344 |
-
value: 80.03099999999999
|
345 |
-
- type: mrr_at_3
|
346 |
-
value: 78.441
|
347 |
-
- type: mrr_at_5
|
348 |
-
value: 79.29
|
349 |
-
- type: ndcg_at_1
|
350 |
-
value: 73.05199999999999
|
351 |
-
- type: ndcg_at_10
|
352 |
-
value: 82.627
|
353 |
-
- type: ndcg_at_100
|
354 |
-
value: 83.737
|
355 |
-
- type: ndcg_at_1000
|
356 |
-
value: 83.946
|
357 |
-
- type: ndcg_at_3
|
358 |
-
value: 79.585
|
359 |
-
- type: ndcg_at_5
|
360 |
-
value: 81.306
|
361 |
-
- type: precision_at_1
|
362 |
-
value: 73.05199999999999
|
363 |
-
- type: precision_at_10
|
364 |
-
value: 9.835
|
365 |
-
- type: precision_at_100
|
366 |
-
value: 1.038
|
367 |
-
- type: precision_at_1000
|
368 |
-
value: 0.106
|
369 |
-
- type: precision_at_3
|
370 |
-
value: 29.756
|
371 |
-
- type: precision_at_5
|
372 |
-
value: 18.788
|
373 |
-
- type: recall_at_1
|
374 |
-
value: 70.68799999999999
|
375 |
-
- type: recall_at_10
|
376 |
-
value: 92.38300000000001
|
377 |
-
- type: recall_at_100
|
378 |
-
value: 97.347
|
379 |
-
- type: recall_at_1000
|
380 |
-
value: 98.992
|
381 |
-
- type: recall_at_3
|
382 |
-
value: 84.37
|
383 |
-
- type: recall_at_5
|
384 |
-
value: 88.434
|
385 |
-
- task:
|
386 |
-
type: Retrieval
|
387 |
-
dataset:
|
388 |
-
type: C_MTEB/MedicalRetrieval
|
389 |
-
name: MTEB MedicalRetrieval
|
390 |
-
config: default
|
391 |
-
split: dev
|
392 |
-
revision: None
|
393 |
-
metrics:
|
394 |
-
- type: map_at_1
|
395 |
-
value: 53.1
|
396 |
-
- type: map_at_10
|
397 |
-
value: 58.36599999999999
|
398 |
-
- type: map_at_100
|
399 |
-
value: 58.939
|
400 |
-
- type: map_at_1000
|
401 |
-
value: 58.99100000000001
|
402 |
-
- type: map_at_3
|
403 |
-
value: 57.15
|
404 |
-
- type: map_at_5
|
405 |
-
value: 57.794999999999995
|
406 |
-
- type: mrr_at_1
|
407 |
-
value: 53.2
|
408 |
-
- type: mrr_at_10
|
409 |
-
value: 58.416000000000004
|
410 |
-
- type: mrr_at_100
|
411 |
-
value: 58.989999999999995
|
412 |
-
- type: mrr_at_1000
|
413 |
-
value: 59.041
|
414 |
-
- type: mrr_at_3
|
415 |
-
value: 57.199999999999996
|
416 |
-
- type: mrr_at_5
|
417 |
-
value: 57.845
|
418 |
-
- type: ndcg_at_1
|
419 |
-
value: 53.1
|
420 |
-
- type: ndcg_at_10
|
421 |
-
value: 60.989000000000004
|
422 |
-
- type: ndcg_at_100
|
423 |
-
value: 63.967
|
424 |
-
- type: ndcg_at_1000
|
425 |
-
value: 65.436
|
426 |
-
- type: ndcg_at_3
|
427 |
-
value: 58.425000000000004
|
428 |
-
- type: ndcg_at_5
|
429 |
-
value: 59.583
|
430 |
-
- type: precision_at_1
|
431 |
-
value: 53.1
|
432 |
-
- type: precision_at_10
|
433 |
-
value: 6.93
|
434 |
-
- type: precision_at_100
|
435 |
-
value: 0.8370000000000001
|
436 |
-
- type: precision_at_1000
|
437 |
-
value: 0.096
|
438 |
-
- type: precision_at_3
|
439 |
-
value: 20.7
|
440 |
-
- type: precision_at_5
|
441 |
-
value: 12.98
|
442 |
-
- type: recall_at_1
|
443 |
-
value: 53.1
|
444 |
-
- type: recall_at_10
|
445 |
-
value: 69.3
|
446 |
-
- type: recall_at_100
|
447 |
-
value: 83.7
|
448 |
-
- type: recall_at_1000
|
449 |
-
value: 95.5
|
450 |
-
- type: recall_at_3
|
451 |
-
value: 62.1
|
452 |
-
- type: recall_at_5
|
453 |
-
value: 64.9
|
454 |
-
- task:
|
455 |
-
type: Reranking
|
456 |
-
dataset:
|
457 |
-
type: C-MTEB/Mmarco-reranking
|
458 |
-
name: MTEB MMarcoReranking
|
459 |
-
config: default
|
460 |
-
split: dev
|
461 |
-
revision: None
|
462 |
-
metrics:
|
463 |
-
- type: map
|
464 |
-
value: 33.548800108363665
|
465 |
-
- type: mrr
|
466 |
-
value: 32.529761904761905
|
467 |
-
- task:
|
468 |
-
type: Reranking
|
469 |
-
dataset:
|
470 |
-
type: C-MTEB/T2Reranking
|
471 |
-
name: MTEB T2Reranking
|
472 |
-
config: default
|
473 |
-
split: dev
|
474 |
-
revision: None
|
475 |
-
metrics:
|
476 |
-
- type: map
|
477 |
-
value: 69.43381583724414
|
478 |
-
- type: mrr
|
479 |
-
value: 80.47879657392181
|
480 |
-
- task:
|
481 |
-
type: Retrieval
|
482 |
-
dataset:
|
483 |
-
type: C_MTEB/T2Retrieval
|
484 |
-
name: MTEB T2Retrieval
|
485 |
-
config: default
|
486 |
-
split: dev
|
487 |
-
revision: None
|
488 |
-
metrics:
|
489 |
-
- type: map_at_1
|
490 |
-
value: 28.116000000000003
|
491 |
-
- type: map_at_10
|
492 |
-
value: 80.026
|
493 |
-
- type: map_at_100
|
494 |
-
value: 83.541
|
495 |
-
- type: map_at_1000
|
496 |
-
value: 83.592
|
497 |
-
- type: map_at_3
|
498 |
-
value: 56.092
|
499 |
-
- type: map_at_5
|
500 |
-
value: 69.114
|
501 |
-
- type: mrr_at_1
|
502 |
-
value: 91.557
|
503 |
-
- type: mrr_at_10
|
504 |
-
value: 93.73700000000001
|
505 |
-
- type: mrr_at_100
|
506 |
-
value: 93.808
|
507 |
-
- type: mrr_at_1000
|
508 |
-
value: 93.811
|
509 |
-
- type: mrr_at_3
|
510 |
-
value: 93.384
|
511 |
-
- type: mrr_at_5
|
512 |
-
value: 93.614
|
513 |
-
- type: ndcg_at_1
|
514 |
-
value: 91.553
|
515 |
-
- type: ndcg_at_10
|
516 |
-
value: 87.003
|
517 |
-
- type: ndcg_at_100
|
518 |
-
value: 90.128
|
519 |
-
- type: ndcg_at_1000
|
520 |
-
value: 90.615
|
521 |
-
- type: ndcg_at_3
|
522 |
-
value: 88.205
|
523 |
-
- type: ndcg_at_5
|
524 |
-
value: 86.978
|
525 |
-
- type: precision_at_1
|
526 |
-
value: 91.553
|
527 |
-
- type: precision_at_10
|
528 |
-
value: 43.25
|
529 |
-
- type: precision_at_100
|
530 |
-
value: 5.067
|
531 |
-
- type: precision_at_1000
|
532 |
-
value: 0.518
|
533 |
-
- type: precision_at_3
|
534 |
-
value: 77.25
|
535 |
-
- type: precision_at_5
|
536 |
-
value: 64.902
|
537 |
-
- type: recall_at_1
|
538 |
-
value: 28.116000000000003
|
539 |
-
- type: recall_at_10
|
540 |
-
value: 85.994
|
541 |
-
- type: recall_at_100
|
542 |
-
value: 96.345
|
543 |
-
- type: recall_at_1000
|
544 |
-
value: 98.867
|
545 |
-
- type: recall_at_3
|
546 |
-
value: 57.67099999999999
|
547 |
-
- type: recall_at_5
|
548 |
-
value: 72.26
|
549 |
-
- task:
|
550 |
-
type: Retrieval
|
551 |
-
dataset:
|
552 |
-
type: C_MTEB/VideoRetrieval
|
553 |
-
name: MTEB VideoRetrieval
|
554 |
-
config: default
|
555 |
-
split: dev
|
556 |
-
revision: None
|
557 |
-
metrics:
|
558 |
-
- type: map_at_1
|
559 |
-
value: 64.9
|
560 |
-
- type: map_at_10
|
561 |
-
value: 73.763
|
562 |
-
- type: map_at_100
|
563 |
-
value: 74.116
|
564 |
-
- type: map_at_1000
|
565 |
-
value: 74.12100000000001
|
566 |
-
- type: map_at_3
|
567 |
-
value: 72.15
|
568 |
-
- type: map_at_5
|
569 |
-
value: 73.25
|
570 |
-
- type: mrr_at_1
|
571 |
-
value: 64.9
|
572 |
-
- type: mrr_at_10
|
573 |
-
value: 73.763
|
574 |
-
- type: mrr_at_100
|
575 |
-
value: 74.116
|
576 |
-
- type: mrr_at_1000
|
577 |
-
value: 74.12100000000001
|
578 |
-
- type: mrr_at_3
|
579 |
-
value: 72.15
|
580 |
-
- type: mrr_at_5
|
581 |
-
value: 73.25
|
582 |
-
- type: ndcg_at_1
|
583 |
-
value: 64.9
|
584 |
-
- type: ndcg_at_10
|
585 |
-
value: 77.639
|
586 |
-
- type: ndcg_at_100
|
587 |
-
value: 79.396
|
588 |
-
- type: ndcg_at_1000
|
589 |
-
value: 79.554
|
590 |
-
- type: ndcg_at_3
|
591 |
-
value: 74.406
|
592 |
-
- type: ndcg_at_5
|
593 |
-
value: 76.385
|
594 |
-
- type: precision_at_1
|
595 |
-
value: 64.9
|
596 |
-
- type: precision_at_10
|
597 |
-
value: 8.959999999999999
|
598 |
-
- type: precision_at_100
|
599 |
-
value: 0.979
|
600 |
-
- type: precision_at_1000
|
601 |
-
value: 0.099
|
602 |
-
- type: precision_at_3
|
603 |
-
value: 26.967000000000002
|
604 |
-
- type: precision_at_5
|
605 |
-
value: 17.14
|
606 |
-
- type: recall_at_1
|
607 |
-
value: 64.9
|
608 |
-
- type: recall_at_10
|
609 |
-
value: 89.60000000000001
|
610 |
-
- type: recall_at_100
|
611 |
-
value: 97.89999999999999
|
612 |
-
- type: recall_at_1000
|
613 |
-
value: 99.2
|
614 |
-
- type: recall_at_3
|
615 |
-
value: 80.9
|
616 |
-
- type: recall_at_5
|
617 |
-
value: 85.7
|
618 |
-
---
|
619 |
license: apache-2.0
|
620 |
library_name: transformers
|
621 |
---
|
622 |
|
623 |
-
<h1 align="center">PEG: Towards Robust Text Retrieval with Progressive Learning</h1>
|
624 |
-
|
625 |
## Model Details
|
626 |
We propose the PEG model (a Progressively Learned Textual Embedding), which progressively adjusts the weights of samples contributing to the loss within an extremely large batch, based on the difficulty levels of negative samples.
|
627 |
we have amassed an extensive collection of over 110 million data, spanning a wide range of fields such as general knowledge, finance, tourism, medicine, and more.
|
628 |
|
629 |
-
Our technical report is available at [Paper](https://arxiv.org/pdf/2311.11691.pdf)
|
630 |
|
631 |
## Usage (HuggingFace Transformers)
|
632 |
|
@@ -654,24 +42,4 @@ with torch.no_grad():
|
|
654 |
embeddings = last_hidden_state[:, 0]
|
655 |
print("embeddings:")
|
656 |
print(embeddings)
|
657 |
-
```
|
658 |
-
|
659 |
-
## Contact
|
660 |
-
If you have any question or suggestion related to this project, feel free to open an issue or pull request.
|
661 |
-
You also can email Tong Wu(townswu@tencent.com).
|
662 |
-
|
663 |
-
|
664 |
-
## Citation
|
665 |
-
|
666 |
-
If you find our work helpful for your research, please consider citing the following BibTeX entry:
|
667 |
-
|
668 |
-
```
|
669 |
-
|
670 |
-
@article{wu2023towards,
|
671 |
-
title={Towards Robust Text Retrieval with Progressive Learning},
|
672 |
-
author={Wu, Tong and Qin, Yulei and Zhang, Enwei and Xu, Zihan and Gao, Yuting and Li, Ke and Sun, Xing},
|
673 |
-
journal={arXiv preprint arXiv:2311.11691},
|
674 |
-
year={2023}
|
675 |
-
}
|
676 |
-
|
677 |
```
|
|
|
7 |
- feature-extraction
|
8 |
- sentence-similarity
|
9 |
- transformers
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
license: apache-2.0
|
11 |
library_name: transformers
|
12 |
---
|
13 |
|
|
|
|
|
14 |
## Model Details
|
15 |
We propose the PEG model (a Progressively Learned Textual Embedding), which progressively adjusts the weights of samples contributing to the loss within an extremely large batch, based on the difficulty levels of negative samples.
|
16 |
we have amassed an extensive collection of over 110 million data, spanning a wide range of fields such as general knowledge, finance, tourism, medicine, and more.
|
17 |
|
|
|
18 |
|
19 |
## Usage (HuggingFace Transformers)
|
20 |
|
|
|
42 |
embeddings = last_hidden_state[:, 0]
|
43 |
print("embeddings:")
|
44 |
print(embeddings)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
```
|
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"architectures": [
|
4 |
"BertModel"
|
5 |
],
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "/mnt/data/townswu/ckpts/BAAI-bge-large-zh",
|
3 |
"architectures": [
|
4 |
"BertModel"
|
5 |
],
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:04b94b1bf0b3e8d8c5f0560f7778975afffff6ee2fc96628be1a4bb999ad2845
|
3 |
+
size 1302218477
|