Update README.md
Browse files
README.md
CHANGED
@@ -150,12 +150,12 @@ Which response is of higher overall quality in a medical context? Consider:
|
|
150 |
#### Elo Ratings
|
151 |
|Models|Elo Score|
|
152 |
|:---:|:---:|
|
153 |
-
|
154 |
|Llama3-70B-Instruct| 1643 |
|
155 |
|GPT4-o| 1426 |
|
156 |
|Llama3-8B-Instruct| 1352 |
|
157 |
|Mixtral-8x7b-Instruct| 970 |
|
158 |
-
|
159 |
|OpenBioLLM-70B| 657 |
|
160 |
|JSL-MedLlama-3-8B-v2.0| 447 |
|
161 |
|
@@ -170,12 +170,12 @@ Med42-v2 improves performance on every clinical benchmark compared to our previo
|
|
170 |
|
171 |
|Model|MMLU Pro|MMLU|MedMCQA|MedQA|USMLE|
|
172 |
|---:|:---:|:---:|:---:|:---:|:---:|
|
173 |
-
|
174 |
-
|
175 |
|OpenBioLLM|64.24|90.40|73.18|76.90|79.01|
|
176 |
|GPT-4.0<sup>†</sup>|-|87.00|69.50|78.90|84.05|
|
177 |
|MedGemini*|-|-|-|84.00|-|
|
178 |
-
|Med-PaLM-2(5-shot)*|-|87.77|71.30|79.70|-|
|
179 |
|Med42|-|76.72|60.90|61.50|71.85|
|
180 |
|ClinicalCamel-70B|-|69.75|47.00|53.40|54.30|
|
181 |
|GPT-3.5<sup>†</sup>|-|66.63|50.10|50.80|53.00|
|
@@ -204,8 +204,7 @@ We thank the Torch FSDP team for their robust distributed training framework, th
|
|
204 |
```
|
205 |
@article{christophe2024med42,
|
206 |
title={Med42-v2 - A Suite of Clinically-aligned Large Language Models},
|
207 |
-
author={Christophe, Cl{\'e}ment and Raha, Tathagata and Hayat, Nasir and Kanithi, Praveen and Al-Mahrooqi, Ahmed and Munjal, Prateek and Saadi, Nada and Javed, Hamza and Salman, Umar and Pimentel, Marco and Rajan, Ronnie and Khan, Shadab},
|
208 |
-
journal={M42},
|
209 |
year={2024}
|
210 |
}
|
211 |
```
|
|
|
150 |
#### Elo Ratings
|
151 |
|Models|Elo Score|
|
152 |
|:---:|:---:|
|
153 |
+
|**Med42-v2-70B**| 1764 |
|
154 |
|Llama3-70B-Instruct| 1643 |
|
155 |
|GPT4-o| 1426 |
|
156 |
|Llama3-8B-Instruct| 1352 |
|
157 |
|Mixtral-8x7b-Instruct| 970 |
|
158 |
+
|**Med42-v2-8B**| 924 |
|
159 |
|OpenBioLLM-70B| 657 |
|
160 |
|JSL-MedLlama-3-8B-v2.0| 447 |
|
161 |
|
|
|
170 |
|
171 |
|Model|MMLU Pro|MMLU|MedMCQA|MedQA|USMLE|
|
172 |
|---:|:---:|:---:|:---:|:---:|:---:|
|
173 |
+
|**Med42v2-70B**|64.36|87.12|73.20|79.10|83.80|
|
174 |
+
|**Med42v2-8B**|54.30|75.76|61.34|62.84|67.04|
|
175 |
|OpenBioLLM|64.24|90.40|73.18|76.90|79.01|
|
176 |
|GPT-4.0<sup>†</sup>|-|87.00|69.50|78.90|84.05|
|
177 |
|MedGemini*|-|-|-|84.00|-|
|
178 |
+
|Med-PaLM-2 (5-shot)*|-|87.77|71.30|79.70|-|
|
179 |
|Med42|-|76.72|60.90|61.50|71.85|
|
180 |
|ClinicalCamel-70B|-|69.75|47.00|53.40|54.30|
|
181 |
|GPT-3.5<sup>†</sup>|-|66.63|50.10|50.80|53.00|
|
|
|
204 |
```
|
205 |
@article{christophe2024med42,
|
206 |
title={Med42-v2 - A Suite of Clinically-aligned Large Language Models},
|
207 |
+
author={Christophe, Cl{\'e}ment and Raha, Tathagata and Hayat, Nasir and Kanithi, Praveen and Al-Mahrooqi, Ahmed and Munjal, Prateek and Saadi, Nada and Javed, Hamza and Salman, Umar and Maslenkova, Svetlana and Pimentel, Marco and Rajan, Ronnie and Khan, Shadab},
|
|
|
208 |
year={2024}
|
209 |
}
|
210 |
```
|