File size: 3,260 Bytes
ad8e780
 
 
 
 
 
 
0bb31bc
ad8e780
 
73a04d8
 
 
e08462b
73a04d8
 
 
ad8e780
83abc20
 
 
 
 
 
0bb31bc
 
ad8e780
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73a04d8
6cd57e5
73a04d8
 
6cd57e5
3ee4539
73a04d8
 
 
 
 
6cd57e5
4b57226
73a04d8
 
 
 
 
 
6cd57e5
 
 
73a04d8
 
83abc20
73a04d8
ad8e780
 
 
 
 
0bb31bc
ad8e780
 
6a1592c
6cd57e5
6c57cb0
 
e3ec4e5
6cd57e5
ad8e780
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
from apscheduler.schedulers.background import BackgroundScheduler
from model_types import MODEL_TYPES, ModelType
from huggingface_hub import HfApi
import matplotlib.pyplot as plt
import bar_chart_race as bcr
import pandas as pd
import gradio as gr
import requests
import os


def restart_space():
    HfApi().restart_space(
        repo_id="IlyasMoutawwakil/llm-bar-race",
        token=os.environ.get("HF_TOKEN", None),
    )


if os.path.exists("open-llm-race-dataset.csv"):
    open_llm_race_dataset = pd.read_csv("open-llm-race-dataset.csv")
else:
    open_llm_race_dataset = pd.read_csv(
        "https://huggingface.co/datasets/IlyasMoutawwakil/open-llm-race-dataset/resolve/main/open-llm-race-dataset.csv"
    )


# resample for ever model to a daily frequency
open_llm_race_dataset["date"] = pd.to_datetime(open_llm_race_dataset["date"])
open_llm_race_dataset = (
    open_llm_race_dataset.set_index("date", drop=True)
    .groupby("model", as_index=False)
    .resample("D", how="last", closed="right", fill_method="ffill")
    .last()
    .reset_index(drop=False)
)
# filter
open_llm_race_dataset["date"] = open_llm_race_dataset["date"].dt.strftime("%Y-%m-%d")
open_llm_race_dataset = open_llm_race_dataset[
    open_llm_race_dataset["date"] >= "2023-07-10"
]
open_llm_race_dataset = open_llm_race_dataset[["date", "score", "model"]]
# drop nan values
open_llm_race_dataset.dropna(inplace=True)
# drop duplicates on model and date
open_llm_race_dataset.drop_duplicates(subset=["model", "date"], inplace=True)
# add the model type
open_llm_race_dataset["type"] = open_llm_race_dataset["model"].apply(
    lambda x: MODEL_TYPES[x].name if x in MODEL_TYPES else ModelType.Unknown.name
)


def get_bar_chart(model_type: str, top_n: int = 10, title: str = ""):
    fig, ax = plt.subplots(figsize=(12, 6))
    ax.set_xlim(0, 100)
    plt.subplots_adjust(left=0.25)

    subset = open_llm_race_dataset[open_llm_race_dataset["type"] == model_type]
    subset = subset.pivot(index="date", columns="model", values="score")
    subset.fillna(0, inplace=True)
    fig = bcr.bar_chart_race(
        subset,
        title=title,
        n_bars=top_n,
        fixed_max=True,
        period_length=1000,
        steps_per_period=20,
        end_period_pause=100,
        bar_texttemplate="{x:.2f}",
        filter_column_colors=True,
        bar_label_font=10,
        tick_label_font=10,
        bar_kwargs={"alpha": 0.2, "ec": "black", "lw": 3},
        fig=fig,
    )
    return gr.HTML(fig)


# Demo interface
demo = gr.Blocks()
with demo:
    # leaderboard title
    gr.HTML("<center><h1>LLM Bar Race πŸ“ŠπŸƒβ€β™‚οΈ</h1></center>")

    with gr.Tabs():
        with gr.TabItem(label="Pretrained Models"):
            get_bar_chart(ModelType.PT.name, title="Pretrained Models")
        with gr.TabItem(label="Instructions Finetuned Models"):
            get_bar_chart(ModelType.IFT.name, title="Instructions Finetuned Models")
        with gr.TabItem(label="RLHF Models"):
            get_bar_chart(ModelType.RL.name, top_n=4, title="RLHF Models")


# Restart space every hour
scheduler = BackgroundScheduler()
scheduler.add_job(
    func=restart_space,
    trigger="interval",
    seconds=3600,
)
scheduler.start()
demo.queue(concurrency_count=10).launch()