Spaces:
Running
Running
add global timeout check
Browse files
utils.py
CHANGED
@@ -1,11 +1,35 @@
|
|
1 |
import itertools
|
|
|
|
|
2 |
import numpy as np
|
3 |
from typing import Dict
|
4 |
from datasets import load_dataset
|
5 |
from .testing_util import run_test
|
6 |
|
7 |
DATASET = "codeparrot/apps"
|
8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
def evaluate_generations(generations: list, level: str = "all", debug: bool = False):
|
11 |
"""We take the list of code generations and try to compile them
|
|
|
1 |
import itertools
|
2 |
+
import json
|
3 |
+
import multiprocessing
|
4 |
import numpy as np
|
5 |
from typing import Dict
|
6 |
from datasets import load_dataset
|
7 |
from .testing_util import run_test
|
8 |
|
9 |
DATASET = "codeparrot/apps"
|
10 |
+
TIMEOUT = 10
|
11 |
+
|
12 |
+
def check_correctness(sample, generation, timeout, debug=True):
|
13 |
+
"""Check correctness of code generation with a global timeout.
|
14 |
+
The global timeout is to catch some extreme/rare cases not handled by the timeouts
|
15 |
+
inside `run_test`"""
|
16 |
+
def _temp_run(sample, generation, debug, result):
|
17 |
+
result.append(run_test(sample, test=generation, debug=debug))
|
18 |
+
|
19 |
+
manager = multiprocessing.Manager()
|
20 |
+
result = manager.list()
|
21 |
+
p = multiprocessing.Process(target=_temp_run, args=(sample, generation, debug, result))
|
22 |
+
p.start()
|
23 |
+
p.join(timeout=timeout + 1)
|
24 |
+
if p.is_alive():
|
25 |
+
p.kill()
|
26 |
+
if not result:
|
27 |
+
in_outs = json.loads(sample["input_output"])
|
28 |
+
# consider that all tests failed
|
29 |
+
result = [[-1 for i in range(len(in_outs["inputs"]))]]
|
30 |
+
if debug:
|
31 |
+
print(f"global timeout")
|
32 |
+
return result[0]
|
33 |
|
34 |
def evaluate_generations(generations: list, level: str = "all", debug: bool = False):
|
35 |
"""We take the list of code generations and try to compile them
|