import os import torch from dataclasses import dataclass from enum import Enum from src.envs import CACHE_PATH @dataclass class Task: benchmark: str # metric: str # yeah i don't think we need this. col_name: str num_fewshot: int # how are these differentiated with Tasks in display/utils.py ? class Tasks(Enum): # task0 = Task("pubmedqa", "acc", "PubMedQA", 0) # 64, as in the ATLAS paper # task1 = Task("hellaswag", "acc_norm", "HellaSwag", 0) # 64, as in the ATLAS paper # task0 = Task("medqa", "acc_norm", "MedQA", 0) # medqa_4options? # task0 = Task("medmcqa", "acc_norm", "MedMCQA", 0) # task1 = Task("pubmedqa", "acc", "PubMedQA", 0) task0 = Task("medmcqa", "MedMCQA", 0) task1 = Task("pubmedqa", "PubMedQA", 0) task2 = Task("pubmedqa_no_context", "PubMedQA_no_context", 0) task3 = Task("biolama_umls", "BioLAMA-UMLS", 0) num_fewshots = { "medqa": 0, "medmcqa": 0, "pubmedqa": 0, "pubmedqa_no_context":0, "biolama_umls":0, } # NUM_FEWSHOT = 64 # Change with your few shot EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk") EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk") DEVICE = "cuda" if torch.cuda.is_available() else 'mps' LIMIT = None # Testing; needs to be None