Muennighoff commited on
Commit
e556bec
β€’
1 Parent(s): 4e8ec86
Files changed (1) hide show
  1. app.py +18 -15
app.py CHANGED
@@ -279,10 +279,10 @@ EXTERNAL_MODELS = [
279
  "allenai-specter",
280
  "bert-base-swedish-cased",
281
  "bert-base-uncased",
282
- "bge-base-zh",
283
- "bge-large-zh",
284
  "bge-large-zh-noinstruct",
285
- "bge-small-zh",
286
  "contriever-base-msmarco",
287
  "cross-en-de-roberta-sentence-transformer",
288
  "dfm-encoder-large-v1",
@@ -355,10 +355,10 @@ EXTERNAL_MODEL_TO_LINK = {
355
  "all-mpnet-base-v2": "https://huggingface.co/sentence-transformers/all-mpnet-base-v2",
356
  "bert-base-swedish-cased": "https://huggingface.co/KB/bert-base-swedish-cased",
357
  "bert-base-uncased": "https://huggingface.co/bert-base-uncased",
358
- "bge-base-zh": "https://huggingface.co/BAAI/bge-base-zh",
359
- "bge-large-zh": "https://huggingface.co/BAAI/bge-large-zh",
360
  "bge-large-zh-noinstruct": "https://huggingface.co/BAAI/bge-large-zh-noinstruct",
361
- "bge-small-zh": "https://huggingface.co/BAAI/bge-small-zh",
362
  "contriever-base-msmarco": "https://huggingface.co/nthakur/contriever-base-msmarco",
363
  "cross-en-de-roberta-sentence-transformer": "https://huggingface.co/T-Systems-onsite/cross-en-de-roberta-sentence-transformer",
364
  "DanskBERT": "https://huggingface.co/vesteinn/DanskBERT",
@@ -431,10 +431,10 @@ EXTERNAL_MODEL_TO_DIM = {
431
  "allenai-specter": 768,
432
  "bert-base-swedish-cased": 768,
433
  "bert-base-uncased": 768,
434
- "bge-base-zh": 768,
435
- "bge-large-zh": 1024,
436
  "bge-large-zh-noinstruct": 1024,
437
- "bge-small-zh": 512,
438
  "contriever-base-msmarco": 768,
439
  "cross-en-de-roberta-sentence-transformer": 768,
440
  "DanskBERT": 768,
@@ -507,10 +507,10 @@ EXTERNAL_MODEL_TO_SEQLEN = {
507
  "allenai-specter": 512,
508
  "bert-base-swedish-cased": 512,
509
  "bert-base-uncased": 512,
510
- "bge-base-zh": 512,
511
- "bge-large-zh": 512,
512
  "bge-large-zh-noinstruct": 512,
513
- "bge-small-zh": 512,
514
  "contriever-base-msmarco": 512,
515
  "cross-en-de-roberta-sentence-transformer": 514,
516
  "DanskBERT": 514,
@@ -583,10 +583,10 @@ EXTERNAL_MODEL_TO_SIZE = {
583
  "all-mpnet-base-v2": 0.44,
584
  "bert-base-uncased": 0.44,
585
  "bert-base-swedish-cased": 0.50,
586
- "bge-base-zh": 0.41,
587
- "bge-large-zh": 1.30,
588
  "bge-large-zh-noinstruct": 1.30,
589
- "bge-small-zh": 0.10,
590
  "cross-en-de-roberta-sentence-transformer": 1.11,
591
  "contriever-base-msmarco": 0.44,
592
  "DanskBERT": 0.50,
@@ -675,6 +675,9 @@ MODELS_TO_SKIP = {
675
  "kozistr/fused-large-en",
676
  "sionic-ai/sionic-ai-v2", # Wait for https://huggingface.co/sionic-ai/sionic-ai-v2/discussions/1
677
  "sionic-ai/sionic-ai-v1", # Wait for https://huggingface.co/sionic-ai/sionic-ai-v2/discussions/1
 
 
 
678
  }
679
 
680
  EXTERNAL_MODEL_RESULTS = {model: {k: {v: []} for k, v in TASK_TO_METRIC.items()} for model in EXTERNAL_MODELS}
 
279
  "allenai-specter",
280
  "bert-base-swedish-cased",
281
  "bert-base-uncased",
282
+ "bge-base-zh-v1.5",
283
+ "bge-large-zh-v1.5",
284
  "bge-large-zh-noinstruct",
285
+ "bge-small-zh-v1.5",
286
  "contriever-base-msmarco",
287
  "cross-en-de-roberta-sentence-transformer",
288
  "dfm-encoder-large-v1",
 
355
  "all-mpnet-base-v2": "https://huggingface.co/sentence-transformers/all-mpnet-base-v2",
356
  "bert-base-swedish-cased": "https://huggingface.co/KB/bert-base-swedish-cased",
357
  "bert-base-uncased": "https://huggingface.co/bert-base-uncased",
358
+ "bge-base-zh-v1.5": "https://huggingface.co/BAAI/bge-base-zh-v1.5",
359
+ "bge-large-zh-v1.5": "https://huggingface.co/BAAI/bge-large-zh-v1.5",
360
  "bge-large-zh-noinstruct": "https://huggingface.co/BAAI/bge-large-zh-noinstruct",
361
+ "bge-small-zh-v1.5": "https://huggingface.co/BAAI/bge-small-zh-v1.5",
362
  "contriever-base-msmarco": "https://huggingface.co/nthakur/contriever-base-msmarco",
363
  "cross-en-de-roberta-sentence-transformer": "https://huggingface.co/T-Systems-onsite/cross-en-de-roberta-sentence-transformer",
364
  "DanskBERT": "https://huggingface.co/vesteinn/DanskBERT",
 
431
  "allenai-specter": 768,
432
  "bert-base-swedish-cased": 768,
433
  "bert-base-uncased": 768,
434
+ "bge-base-zh-v1.5": 768,
435
+ "bge-large-zh-v1.5": 1024,
436
  "bge-large-zh-noinstruct": 1024,
437
+ "bge-small-zh-v1.5": 512,
438
  "contriever-base-msmarco": 768,
439
  "cross-en-de-roberta-sentence-transformer": 768,
440
  "DanskBERT": 768,
 
507
  "allenai-specter": 512,
508
  "bert-base-swedish-cased": 512,
509
  "bert-base-uncased": 512,
510
+ "bge-base-zh-v1.5": 512,
511
+ "bge-large-zh-v1.5": 512,
512
  "bge-large-zh-noinstruct": 512,
513
+ "bge-small-zh-v1.5": 512,
514
  "contriever-base-msmarco": 512,
515
  "cross-en-de-roberta-sentence-transformer": 514,
516
  "DanskBERT": 514,
 
583
  "all-mpnet-base-v2": 0.44,
584
  "bert-base-uncased": 0.44,
585
  "bert-base-swedish-cased": 0.50,
586
+ "bge-base-zh-v1.5": 0.41,
587
+ "bge-large-zh-v1.5": 1.30,
588
  "bge-large-zh-noinstruct": 1.30,
589
+ "bge-small-zh-v1.5": 0.10,
590
  "cross-en-de-roberta-sentence-transformer": 1.11,
591
  "contriever-base-msmarco": 0.44,
592
  "DanskBERT": 0.50,
 
675
  "kozistr/fused-large-en",
676
  "sionic-ai/sionic-ai-v2", # Wait for https://huggingface.co/sionic-ai/sionic-ai-v2/discussions/1
677
  "sionic-ai/sionic-ai-v1", # Wait for https://huggingface.co/sionic-ai/sionic-ai-v2/discussions/1
678
+ "BAAI/bge-large-en", # Deprecated in favor of v1.5
679
+ "BAAI/bge-base-en", # Deprecated in favor of v1.5
680
+ "BAAI/bge-small-en", # Deprecated in favor of v1.5
681
  }
682
 
683
  EXTERNAL_MODEL_RESULTS = {model: {k: {v: []} for k, v in TASK_TO_METRIC.items()} for model in EXTERNAL_MODELS}