Spaces:
Running
Running
Jae-Won Chung
commited on
Commit
•
f5248c1
1
Parent(s):
f98b171
Better names for scripts
Browse files
scripts/{print_results.py → count_benchmark_items.py}
RENAMED
File without changes
|
scripts/read_score.py
DELETED
@@ -1,22 +0,0 @@
|
|
1 |
-
import pandas as pd
|
2 |
-
import os
|
3 |
-
import csv
|
4 |
-
|
5 |
-
folder = "nlp"
|
6 |
-
folders = os.listdir(folder)
|
7 |
-
out_csv = csv.writer(open("score.csv", "w", newline=""))
|
8 |
-
for model in folders:
|
9 |
-
tasks = os.listdir(folder+"/"+str(model))
|
10 |
-
scores = []
|
11 |
-
for task in tasks:
|
12 |
-
df = pd.read_json(folder+"/"+str(model)+"/"+str(task))
|
13 |
-
model_args = df['config']['model_args']
|
14 |
-
results=df['results']
|
15 |
-
keys = results.keys()
|
16 |
-
if str(keys[0]) == "truthfulqa_mc":
|
17 |
-
score = results=df['results'][keys[0]]['mc2']
|
18 |
-
else:
|
19 |
-
score = results=df['results'][keys[0]]['acc_norm']
|
20 |
-
num_fewshot = df['config']['num_fewshot']
|
21 |
-
scores.append(score)
|
22 |
-
out_csv.writerow([model] + scores)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|