File size: 3,039 Bytes
8c49cb6
 
 
df66f6e
 
05bda40
b1a1395
8c49cb6
 
6b9cbbe
 
 
 
 
 
 
 
8c49cb6
 
6b9cbbe
 
 
 
 
8c49cb6
 
6b9cbbe
 
8c49cb6
6b9cbbe
8c49cb6
6b9cbbe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8c49cb6
6b9cbbe
 
 
 
 
 
 
 
 
 
 
8c49cb6
6b9cbbe
8c49cb6
 
6b9cbbe
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import json
import os
import pandas as pd
from src.display.formatting import has_no_nan_values, make_clickable_model
from src.display.utils import AutoEvalColumn, EvalQueueColumn, baseline_row
from src.leaderboard.filter_models import filter_models_flags
from src.leaderboard.read_evals import get_raw_eval_results


def _load_json_data(file_path):
    """Safely load JSON data from a file."""
    try:
        with open(file_path, "r") as file:
            return json.load(file)
    except json.JSONDecodeError:
        print(f"Error reading JSON from {file_path}")
        return None  # Or raise an exception


def _process_model_data(entry, model_name_key="model", revision_key="revision"):
    """Enrich model data with clickable links and revisions."""
    entry[EvalQueueColumn.model.name] = make_clickable_model(entry.get(model_name_key, ""))
    entry[EvalQueueColumn.revision.name] = entry.get(revision_key, "main")
    return entry


def get_evaluation_queue_df(save_path, cols):
    """Generate dataframes for pending, running, and finished evaluation entries."""
    all_evals = []
    entries = os.listdir(save_path)
    for entry in entries:
        if entry.startswith(".") or entry.endswith(".md"):
            continue
        file_path = os.path.join(save_path, entry)
        if os.path.isfile(file_path):  # Check if it's a file
            data = _load_json_data(file_path)
            if data:
                all_evals.append(_process_model_data(data))
        else:
            # Optionally handle directory contents if needed
            sub_entries = os.listdir(file_path)
            for sub_entry in sub_entries:
                sub_file_path = os.path.join(file_path, sub_entry)
                if os.path.isfile(sub_file_path):
                    data = _load_json_data(sub_file_path)
                    if data:
                        all_evals.append(_process_model_data(data))

    # Organizing data by status
    status_map = {
        "PENDING": ["PENDING", "RERUN"],
        "RUNNING": ["RUNNING"],
        "FINISHED": ["FINISHED", "PENDING_NEW_EVAL"],
    }
    status_dfs = {status: [] for status in status_map}
    for eval_data in all_evals:
        for status, extra_statuses in status_map.items():
            if eval_data["status"] in extra_statuses:
                status_dfs[status].append(eval_data)

    return tuple(pd.DataFrame(status_dfs[status], columns=cols) for status in ["FINISHED", "RUNNING", "PENDING"])


def get_leaderboard_df(results_path, requests_path, dynamic_path, cols, benchmark_cols):
    """Retrieve and process leaderboard data."""
    raw_data = get_raw_eval_results(results_path, requests_path, dynamic_path)
    all_data_json = [model.to_dict() for model in raw_data] + [baseline_row]
    filter_models_flags(all_data_json)

    df = pd.DataFrame.from_records(all_data_json)
    df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
    df = df[cols].round(decimals=2)
    df = df[has_no_nan_values(df, benchmark_cols)]
    return raw_data, df