Update README.md
Browse files
README.md
CHANGED
@@ -27,7 +27,7 @@ model-index:
|
|
27 |
num_few_shot: 0
|
28 |
metrics:
|
29 |
- type: inst_level_strict_acc and prompt_level_strict_acc
|
30 |
-
value:
|
31 |
name: strict accuracy
|
32 |
source:
|
33 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.5B-chat
|
@@ -42,7 +42,7 @@ model-index:
|
|
42 |
num_few_shot: 3
|
43 |
metrics:
|
44 |
- type: acc_norm
|
45 |
-
value:
|
46 |
name: normalized accuracy
|
47 |
source:
|
48 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.5B-chat
|
@@ -57,7 +57,7 @@ model-index:
|
|
57 |
num_few_shot: 4
|
58 |
metrics:
|
59 |
- type: exact_match
|
60 |
-
value:
|
61 |
name: exact match
|
62 |
source:
|
63 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.5B-chat
|
@@ -72,7 +72,7 @@ model-index:
|
|
72 |
num_few_shot: 0
|
73 |
metrics:
|
74 |
- type: acc_norm
|
75 |
-
value:
|
76 |
name: acc_norm
|
77 |
source:
|
78 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.5B-chat
|
@@ -87,7 +87,7 @@ model-index:
|
|
87 |
num_few_shot: 0
|
88 |
metrics:
|
89 |
- type: acc_norm
|
90 |
-
value:
|
91 |
name: acc_norm
|
92 |
source:
|
93 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.5B-chat
|
@@ -104,7 +104,7 @@ model-index:
|
|
104 |
num_few_shot: 5
|
105 |
metrics:
|
106 |
- type: acc
|
107 |
-
value:
|
108 |
name: accuracy
|
109 |
source:
|
110 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.5B-chat
|
@@ -153,11 +153,11 @@ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-le
|
|
153 |
|
154 |
| Metric |Value|
|
155 |
|-------------------|----:|
|
156 |
-
|Avg. |
|
157 |
-
|IFEval (0-Shot) |
|
158 |
-
|BBH (3-Shot) |
|
159 |
-
|MATH Lvl 5 (4-Shot)|
|
160 |
-
|GPQA (0-shot) |
|
161 |
-
|MuSR (0-shot) |
|
162 |
-
|MMLU-PRO (5-shot) |
|
163 |
|
|
|
27 |
num_few_shot: 0
|
28 |
metrics:
|
29 |
- type: inst_level_strict_acc and prompt_level_strict_acc
|
30 |
+
value: 56.04
|
31 |
name: strict accuracy
|
32 |
source:
|
33 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.5B-chat
|
|
|
42 |
num_few_shot: 3
|
43 |
metrics:
|
44 |
- type: acc_norm
|
45 |
+
value: 9.41
|
46 |
name: normalized accuracy
|
47 |
source:
|
48 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.5B-chat
|
|
|
57 |
num_few_shot: 4
|
58 |
metrics:
|
59 |
- type: exact_match
|
60 |
+
value: 5.06
|
61 |
name: exact match
|
62 |
source:
|
63 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.5B-chat
|
|
|
72 |
num_few_shot: 0
|
73 |
metrics:
|
74 |
- type: acc_norm
|
75 |
+
value: 1.23
|
76 |
name: acc_norm
|
77 |
source:
|
78 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.5B-chat
|
|
|
87 |
num_few_shot: 0
|
88 |
metrics:
|
89 |
- type: acc_norm
|
90 |
+
value: 1.11
|
91 |
name: acc_norm
|
92 |
source:
|
93 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.5B-chat
|
|
|
104 |
num_few_shot: 5
|
105 |
metrics:
|
106 |
- type: acc
|
107 |
+
value: 9.04
|
108 |
name: accuracy
|
109 |
source:
|
110 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.5B-chat
|
|
|
153 |
|
154 |
| Metric |Value|
|
155 |
|-------------------|----:|
|
156 |
+
|Avg. |13.65|
|
157 |
+
|IFEval (0-Shot) |56.04|
|
158 |
+
|BBH (3-Shot) | 9.41|
|
159 |
+
|MATH Lvl 5 (4-Shot)| 5.06|
|
160 |
+
|GPQA (0-shot) | 1.23|
|
161 |
+
|MuSR (0-shot) | 1.11|
|
162 |
+
|MMLU-PRO (5-shot) | 9.04|
|
163 |
|