MotzWanted's picture
Merge branch 'main' of https://huggingface.co/spaces/openlifescienceai/open_medical_llm_leaderboard
0259587
raw
history blame
1.47 kB
import os
import torch
from dataclasses import dataclass
from enum import Enum
from src.envs import CACHE_PATH
@dataclass
class Task:
benchmark: str
# metric: str # yeah i don't think we need this.
col_name: str
num_fewshot: int
class Tasks(Enum):
task0 = Task("medmcqa", "MedMCQA", 0)
task1 = Task("medqa_4options", "MedQA", 0)
task2 = Task("anatomy (mmlu)", "MMLU Anatomy", 0)
task3 = Task("clinical_knowledge (mmlu)", "MMLU Clinical Knowledge", 0)
task4 = Task("college_biology (mmlu)", "MMLU College Biology", 0)
task5 = Task("college_medicine (mmlu)", "MMLU College Medicine", 0)
task6 = Task("medical_genetics (mmlu)", "MMLU Medical Genetics", 0)
task7 = Task("professional_medicine (mmlu)", "MMLU Professional Medicine", 0)
task8 = Task("pubmedqa", "PubMedQA", 0)
num_fewshots = {
"medmcqa": 0,
"medqa_4options": 0,
"anatomy (mmlu)":0,
"clinical_knowledge (mmlu)": 0,
"college_biology (mmlu)":0,
"college_medicine (mmlu)":0,
"medical_genetics (mmlu)":0,
"professional_medicine (mmlu)":0,
"pubmedqa":0,
}
# NUM_FEWSHOT = 64 # Change with your few shot
EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk")
EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk")
DEVICE = "cuda" if torch.cuda.is_available() else 'mps'
LIMIT = None # Testing; needs to be None