import os import pandas as pd import json from src.display.utils import COLUMNS, EVAL_COLS, Tasks from src.envs import EVAL_RESULTS_PATH def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols): # Initialize an empty DataFrame df = pd.DataFrame(columns=cols) # Load evaluation results from JSON files if os.path.exists(eval_results_path): result_files = [ os.path.join(eval_results_path, f) for f in os.listdir(eval_results_path) if f.endswith('.json') ] data_list = [] for file in result_files: with open(file, 'r') as f: data = json.load(f) flattened_data = {} flattened_data.update(data.get('config', {})) flattened_data.update(data.get('results', {})) data_list.append(flattened_data) if data_list: df = pd.DataFrame(data_list) # Ensure DataFrame has all columns for col in cols: if col not in df.columns: df[col] = None # Convert 'average' column to float and handle errors if 'average' in df.columns: df['average'] = pd.to_numeric(df['average'], errors='coerce') # Sort by 'average' column if it exists if 'average' in df.columns: df = df.sort_values(by=['average'], ascending=False) return df def get_evaluation_queue_df(eval_requests_path, eval_cols): # Initialize empty DataFrames finished_df = pd.DataFrame(columns=eval_cols) running_df = pd.DataFrame(columns=eval_cols) pending_df = pd.DataFrame(columns=eval_cols) # Load evaluation requests from JSON files if os.path.exists(eval_requests_path): request_files = [ os.path.join(eval_requests_path, f) for f in os.listdir(eval_requests_path) if f.endswith('.json') ] data_list = [] for file in request_files: with open(file, 'r') as f: data = json.load(f) data_list.append(data) if data_list: df = pd.DataFrame(data_list) # Split DataFrame based on status finished_df = df[df['status'] == 'finished'] running_df = df[df['status'] == 'running'] pending_df = df[df['status'] == 'pending'] return finished_df, running_df, pending_df