File size: 2,399 Bytes
db1341d
aa37927
db1341d
aa37927
bdb77ab
561f24a
aa37927
233c78c
db1341d
 
 
 
 
bdb77ab
 
 
 
 
db1341d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
561f24a
 
 
 
db1341d
 
 
aa37927
 
39cb364
db1341d
 
 
 
 
 
 
 
bdb77ab
 
 
 
 
db1341d
 
 
 
 
 
 
 
 
 
 
 
561f24a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import os
import pandas as pd
import json

from src.display.utils import COLUMNS, EVAL_COLS, Tasks
from src.envs import EVAL_RESULTS_PATH

def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols):
    # Initialize an empty DataFrame
    df = pd.DataFrame(columns=cols)

    # Load evaluation results from JSON files
    if os.path.exists(eval_results_path):
        result_files = [
            os.path.join(eval_results_path, f) 
            for f in os.listdir(eval_results_path) 
            if f.endswith('.json')
        ]
        data_list = []
        for file in result_files:
            with open(file, 'r') as f:
                data = json.load(f)
                flattened_data = {}
                flattened_data.update(data.get('config', {}))
                flattened_data.update(data.get('results', {}))
                data_list.append(flattened_data)
        if data_list:
            df = pd.DataFrame(data_list)

    # Ensure DataFrame has all columns
    for col in cols:
        if col not in df.columns:
            df[col] = None

    # Convert 'average' column to float and handle errors
    if 'average' in df.columns:
        df['average'] = pd.to_numeric(df['average'], errors='coerce')

    # Sort by 'average' column if it exists
    if 'average' in df.columns:
        df = df.sort_values(by=['average'], ascending=False)

    return df

def get_evaluation_queue_df(eval_requests_path, eval_cols):
    # Initialize empty DataFrames
    finished_df = pd.DataFrame(columns=eval_cols)
    running_df = pd.DataFrame(columns=eval_cols)
    pending_df = pd.DataFrame(columns=eval_cols)

    # Load evaluation requests from JSON files
    if os.path.exists(eval_requests_path):
        request_files = [
            os.path.join(eval_requests_path, f) 
            for f in os.listdir(eval_requests_path) 
            if f.endswith('.json')
        ]
        data_list = []
        for file in request_files:
            with open(file, 'r') as f:
                data = json.load(f)
                data_list.append(data)
        if data_list:
            df = pd.DataFrame(data_list)
            # Split DataFrame based on status
            finished_df = df[df['status'] == 'finished']
            running_df = df[df['status'] == 'running']
            pending_df = df[df['status'] == 'pending']

    return finished_df, running_df, pending_df