Spaces:
Running
Running
Zhiyu Wu
Zhiyu Wu
commited on
Commit
•
8595b18
1
Parent(s):
fddc51a
Add NLP evaluation metrics (#8)
Browse filesCo-authored-by: Zhiyu Wu <zhiyuwu@ampere01.eecs.umich.edu>
- data/2023-06-17/score.csv +21 -21
data/2023-06-17/score.csv
CHANGED
@@ -1,21 +1,21 @@
|
|
1 |
-
model,
|
2 |
-
lmsys/vicuna-7B,
|
3 |
-
lmsys/vicuna-13B,
|
4 |
-
tatsu-lab/alpaca-7B,
|
5 |
-
metaai/llama-7B,
|
6 |
-
metaai/llama-13B,
|
7 |
-
camel-ai/CAMEL-13B-Combined-Data,
|
8 |
-
BlinkDL/RWKV-4-Raven-7B-v12-Eng98%-Other2%-20230521-ctx8192.pth,NaN
|
9 |
-
databricks/dolly-v2-12b,
|
10 |
-
FreedomIntelligence/phoenix-inst-chat-7b,
|
11 |
-
h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt-v2,
|
12 |
-
lmsys/fastchat-t5-3b-v1.0,
|
13 |
-
Neutralzz/BiLLa-7B-SFT,
|
14 |
-
nomic-ai/gpt4all-13b-snoozy,
|
15 |
-
openaccess-ai-collective/manticore-13b-chat-pyg,
|
16 |
-
OpenAssistant/oasst-sft-1-pythia-12b,
|
17 |
-
project-baize/baize-v2-7B,
|
18 |
-
BAIR/koala-7b,
|
19 |
-
BAIR/koala-13b,
|
20 |
-
StabilityAI/stablelm-tuned-alpha-7b,
|
21 |
-
togethercomputer/RedPajama-INCITE-7B-Chat,
|
|
|
1 |
+
model,average,ARC (25-s),HellaSwag (10-s),TruthfulQA (MC) (0-s)
|
2 |
+
lmsys/vicuna-7B,60.0,53.5,77.5,49.0
|
3 |
+
lmsys/vicuna-13B,61.6,52.9,80.1,51.8
|
4 |
+
tatsu-lab/alpaca-7B,56.4,52.6,76.9,39.6
|
5 |
+
metaai/llama-7B,54.3,51.1,77.7,34.1
|
6 |
+
metaai/llama-13B,59.0,56.3,80.9,39.9
|
7 |
+
camel-ai/CAMEL-13B-Combined-Data,60.7,55.5,79.3,47.3
|
8 |
+
BlinkDL/RWKV-4-Raven-7B-v12-Eng98%-Other2%-20230521-ctx8192.pth,NaN,NaN,NaN,NaN
|
9 |
+
databricks/dolly-v2-12b,49.1,42.2,71.8,33.4
|
10 |
+
FreedomIntelligence/phoenix-inst-chat-7b,51.8,45.0,63.2,47.1
|
11 |
+
h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt-v2,45.5,36.9,61.6,37.9
|
12 |
+
lmsys/fastchat-t5-3b-v1.0,43.7,35.9,46.4,48.8
|
13 |
+
Neutralzz/BiLLa-7B-SFT,34.2,27.7,26.0,49.0
|
14 |
+
nomic-ai/gpt4all-13b-snoozy,61.1,56.1,78.7,48.4
|
15 |
+
openaccess-ai-collective/manticore-13b-chat-pyg,63.2,58.7,82.0,48.9
|
16 |
+
OpenAssistant/oasst-sft-1-pythia-12b,51.6,45.6,69.9,39.2
|
17 |
+
project-baize/baize-v2-7B,55.1,48.5,75.0,41.7
|
18 |
+
BAIR/koala-7b,55.6,47.1,73.7,46.0
|
19 |
+
BAIR/koala-13b,60.2,52.9,77.5,50.1
|
20 |
+
StabilityAI/stablelm-tuned-alpha-7b,41.9,31.9,53.6,40.2
|
21 |
+
togethercomputer/RedPajama-INCITE-7B-Chat,49.7,42.2,70.8,36.1
|