Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
pminervini
commited on
Commit
•
f21645c
1
Parent(s):
7e267bf
update
Browse files- cli/halueval-cli.py +3 -1
- src/backend/envs.py +2 -0
cli/halueval-cli.py
CHANGED
@@ -8,6 +8,8 @@ from src.backend.manage_requests import EvalRequest
|
|
8 |
from src.backend.run_eval_suite import run_evaluation
|
9 |
|
10 |
from src.backend.tasks.xsum.task import XSum
|
|
|
|
|
11 |
|
12 |
from lm_eval.tasks import initialize_tasks, include_task_folder
|
13 |
from lm_eval import tasks, evaluator, utils
|
@@ -31,7 +33,7 @@ def main():
|
|
31 |
eval_request = [r for r in eval_requests if 'bloom-560m' in r.model][0]
|
32 |
|
33 |
# my_task = Task("memo-trap", "acc", "memo-trap", 0)
|
34 |
-
my_task = Task("
|
35 |
|
36 |
TASKS_HARNESS = [my_task]
|
37 |
# task_names = ['triviaqa']
|
|
|
8 |
from src.backend.run_eval_suite import run_evaluation
|
9 |
|
10 |
from src.backend.tasks.xsum.task import XSum
|
11 |
+
from src.backend.tasks.cnndm.task import CNNDM
|
12 |
+
from src.backend.tasks.selfcheckgpt.task import SelfCheckGpt
|
13 |
|
14 |
from lm_eval.tasks import initialize_tasks, include_task_folder
|
15 |
from lm_eval import tasks, evaluator, utils
|
|
|
33 |
eval_request = [r for r in eval_requests if 'bloom-560m' in r.model][0]
|
34 |
|
35 |
# my_task = Task("memo-trap", "acc", "memo-trap", 0)
|
36 |
+
my_task = Task("selfcheckgpt", "avg-selfcheckgpt", "SGPT", 2)
|
37 |
|
38 |
TASKS_HARNESS = [my_task]
|
39 |
# task_names = ['triviaqa']
|
src/backend/envs.py
CHANGED
@@ -40,6 +40,8 @@ class Tasks(Enum):
|
|
40 |
task11 = Task("nq8", "em", "NQ Open 8", 8)
|
41 |
task12 = Task("tqa8", "em", "TriviaQA 8", 8)
|
42 |
|
|
|
|
|
43 |
# NUM_FEWSHOT = 64 # Change with your few shot
|
44 |
|
45 |
|
|
|
40 |
task11 = Task("nq8", "em", "NQ Open 8", 8)
|
41 |
task12 = Task("tqa8", "em", "TriviaQA 8", 8)
|
42 |
|
43 |
+
task13 = Task("ifeval", "inst_level_strict_acc", "IFEval", 0)
|
44 |
+
|
45 |
# NUM_FEWSHOT = 64 # Change with your few shot
|
46 |
|
47 |
|