Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
pminervini
commited on
Commit
·
277f064
1
Parent(s):
a647d17
update
Browse files- backend-cli.py +1 -1
- src/backend/run_eval_suite.py +2 -2
- src/backend/tasks/__init__.py +0 -0
backend-cli.py
CHANGED
@@ -74,7 +74,7 @@ def request_to_result_name(request: EvalRequest) -> str:
|
|
74 |
|
75 |
def process_evaluation(task: Task, eval_request: EvalRequest) -> dict:
|
76 |
results = run_evaluation(eval_request=eval_request, task_names=[task.benchmark], num_fewshot=task.num_fewshot,
|
77 |
-
batch_size=1, device=DEVICE,
|
78 |
|
79 |
dumped = json.dumps(results, indent=2)
|
80 |
print(dumped)
|
|
|
74 |
|
75 |
def process_evaluation(task: Task, eval_request: EvalRequest) -> dict:
|
76 |
results = run_evaluation(eval_request=eval_request, task_names=[task.benchmark], num_fewshot=task.num_fewshot,
|
77 |
+
batch_size=1, device=DEVICE, use_cache=False, limit=LIMIT)
|
78 |
|
79 |
dumped = json.dumps(results, indent=2)
|
80 |
print(dumped)
|
src/backend/run_eval_suite.py
CHANGED
@@ -6,7 +6,7 @@ import logging
|
|
6 |
logging.getLogger("openai").setLevel(logging.WARNING)
|
7 |
|
8 |
|
9 |
-
def run_evaluation(eval_request: EvalRequest, task_names, num_fewshot, batch_size, device,
|
10 |
if limit:
|
11 |
print("WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.")
|
12 |
|
@@ -17,7 +17,7 @@ def run_evaluation(eval_request: EvalRequest, task_names, num_fewshot, batch_siz
|
|
17 |
results = evaluator.simple_evaluate(model="hf-causal-experimental", # "hf-causal"
|
18 |
model_args=eval_request.get_model_args(),
|
19 |
tasks=task_names, num_fewshot=num_fewshot,
|
20 |
-
batch_size=batch_size, device=device,
|
21 |
limit=limit, write_out=True, output_base_path="logs")
|
22 |
|
23 |
results["config"]["model_dtype"] = eval_request.precision
|
|
|
6 |
logging.getLogger("openai").setLevel(logging.WARNING)
|
7 |
|
8 |
|
9 |
+
def run_evaluation(eval_request: EvalRequest, task_names, num_fewshot, batch_size, device, use_cache=False, limit=None) -> dict:
|
10 |
if limit:
|
11 |
print("WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.")
|
12 |
|
|
|
17 |
results = evaluator.simple_evaluate(model="hf-causal-experimental", # "hf-causal"
|
18 |
model_args=eval_request.get_model_args(),
|
19 |
tasks=task_names, num_fewshot=num_fewshot,
|
20 |
+
batch_size=batch_size, device=device, ise_cache=use_cache,
|
21 |
limit=limit, write_out=True, output_base_path="logs")
|
22 |
|
23 |
results["config"]["model_dtype"] = eval_request.precision
|
src/backend/tasks/__init__.py
ADDED
File without changes
|