|
import os |
|
import pandas as pd |
|
import json |
|
|
|
from src.display.utils import COLUMNS, EVAL_COLS, Tasks |
|
from src.envs import EVAL_RESULTS_PATH |
|
|
|
def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols): |
|
|
|
df = pd.DataFrame(columns=cols) |
|
|
|
|
|
if os.path.exists(eval_results_path): |
|
result_files = [ |
|
os.path.join(eval_results_path, f) |
|
for f in os.listdir(eval_results_path) |
|
if f.endswith('.json') |
|
] |
|
data_list = [] |
|
for file in result_files: |
|
with open(file, 'r') as f: |
|
data = json.load(f) |
|
flattened_data = {} |
|
flattened_data.update(data.get('config', {})) |
|
flattened_data.update(data.get('results', {})) |
|
data_list.append(flattened_data) |
|
if data_list: |
|
df = pd.DataFrame(data_list) |
|
|
|
|
|
for col in cols: |
|
if col not in df.columns: |
|
df[col] = None |
|
|
|
|
|
if 'average' in df.columns: |
|
df['average'] = pd.to_numeric(df['average'], errors='coerce') |
|
|
|
|
|
if 'average' in df.columns: |
|
df = df.sort_values(by=['average'], ascending=False) |
|
|
|
return df |
|
|
|
def get_evaluation_queue_df(eval_requests_path, eval_cols): |
|
|
|
finished_df = pd.DataFrame(columns=eval_cols) |
|
running_df = pd.DataFrame(columns=eval_cols) |
|
pending_df = pd.DataFrame(columns=eval_cols) |
|
|
|
|
|
if os.path.exists(eval_requests_path): |
|
request_files = [ |
|
os.path.join(eval_requests_path, f) |
|
for f in os.listdir(eval_requests_path) |
|
if f.endswith('.json') |
|
] |
|
data_list = [] |
|
for file in request_files: |
|
with open(file, 'r') as f: |
|
data = json.load(f) |
|
data_list.append(data) |
|
if data_list: |
|
df = pd.DataFrame(data_list) |
|
|
|
finished_df = df[df['status'] == 'finished'] |
|
running_df = df[df['status'] == 'running'] |
|
pending_df = df[df['status'] == 'pending'] |
|
|
|
return finished_df, running_df, pending_df |
|
|