Spaces:
Runtime error
Runtime error
from apscheduler.schedulers.background import BackgroundScheduler | |
from model_types import MODEL_TYPES, ModelType | |
from huggingface_hub import HfApi | |
import matplotlib.pyplot as plt | |
import bar_chart_race as bcr | |
import pandas as pd | |
import gradio as gr | |
import os | |
# MODEL_SIZES = pd.read_pickle( | |
# "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/resolve/main/model_size_cache.pkl" | |
# ) | |
# read in the data | |
open_llm_race_dataset = pd.read_csv("open_llm_race_dataset.csv") | |
# resample for ever model to a daily frequency | |
open_llm_race_dataset["date"] = pd.to_datetime(open_llm_race_dataset["date"]) | |
open_llm_race_dataset = ( | |
open_llm_race_dataset.set_index("date", drop=True) | |
.groupby("model", as_index=False) | |
.resample("D", how="last", closed="right", fill_method="ffill") | |
.last() | |
.reset_index(drop=False) | |
) | |
# filter | |
open_llm_race_dataset["date"] = open_llm_race_dataset["date"].dt.strftime("%Y-%m-%d") | |
open_llm_race_dataset = open_llm_race_dataset[ | |
open_llm_race_dataset["date"] >= "2023-07-10" | |
] | |
open_llm_race_dataset = open_llm_race_dataset[["date", "score", "model"]] | |
# drop nan values | |
open_llm_race_dataset.dropna(inplace=True) | |
# drop duplicates on model and date | |
open_llm_race_dataset.drop_duplicates(subset=["model", "date"], inplace=True) | |
# add the model type | |
open_llm_race_dataset["type"] = open_llm_race_dataset["model"].apply( | |
lambda x: MODEL_TYPES[x].name if x in MODEL_TYPES else ModelType.Unknown.name | |
) | |
# # add the model size | |
# open_llm_race_dataset["size"] = open_llm_race_dataset["model"].apply( | |
# lambda x: MODEL_SIZES[x] if x in MODEL_SIZES else None | |
# ) | |
# Demo interface | |
demo = gr.Blocks() | |
with demo: | |
# leaderboard title | |
gr.HTML("<h1>Open-LLM Race πββοΈ</h1>") | |
with gr.Tabs(): | |
with gr.TabItem(label="Pretrained Models"): | |
pretrained_fig, ax = plt.subplots(figsize=(12, 6)) | |
ax.set_xlim(0, 100) | |
pretrained_dataset = open_llm_race_dataset[ | |
open_llm_race_dataset["type"] == ModelType.PT.name | |
] | |
pretrained_dataset = pretrained_dataset.pivot( | |
index="date", columns="model", values="score" | |
) | |
pretrained_dataset.fillna(0, inplace=True) | |
pretrained_fig = bcr.bar_chart_race( | |
pretrained_dataset, | |
n_bars=10, | |
fixed_max=True, | |
period_length=1000, | |
steps_per_period=20, | |
end_period_pause=100, | |
bar_texttemplate="{x:.2f}", | |
filter_column_colors=True, | |
fig=pretrained_fig, | |
) | |
gr.HTML(pretrained_fig.data) | |
with gr.TabItem(label="Instructions Finetuend Models"): | |
inst_finetuned_fig, ax = plt.subplots(figsize=(12, 6)) | |
ax.set_xlim(0, 100) | |
inst_finetuned_dataset = open_llm_race_dataset[ | |
open_llm_race_dataset["type"] == ModelType.IFT.name | |
] | |
inst_finetuned_dataset = inst_finetuned_dataset.pivot( | |
index="date", columns="model", values="score" | |
) | |
inst_finetuned_dataset.fillna(0, inplace=True) | |
inst_finetuned_fig = bcr.bar_chart_race( | |
inst_finetuned_dataset, | |
n_bars=10, | |
fixed_max=True, | |
period_length=1000, | |
steps_per_period=20, | |
end_period_pause=100, | |
bar_texttemplate="{x:.2f}", | |
filter_column_colors=True, | |
fig=inst_finetuned_fig, | |
) | |
gr.HTML(inst_finetuned_fig.data) | |
with gr.TabItem(label="RLHF Models"): | |
rl_fig, ax = plt.subplots(figsize=(12, 6)) | |
ax.set_xlim(0, 100) | |
rl_dataset = open_llm_race_dataset[ | |
open_llm_race_dataset["type"] == ModelType.IFT.name | |
] | |
rl_dataset = rl_dataset.pivot( | |
index="date", columns="model", values="score" | |
) | |
rl_dataset.fillna(0, inplace=True) | |
rl_fig = bcr.bar_chart_race( | |
rl_dataset, | |
n_bars=10, | |
fixed_max=True, | |
period_length=1000, | |
steps_per_period=20, | |
end_period_pause=100, | |
bar_texttemplate="{x:.2f}", | |
filter_column_colors=True, | |
fig=rl_fig, | |
) | |
gr.HTML(rl_fig.data) | |
# with gr.TabItem(label="Finetuned Models"): | |
# finetuned_dataset = open_llm_race_dataset[ | |
# open_llm_race_dataset["type"] == ModelType.FT.name | |
# ] | |
# finetuned_dataset = finetuned_dataset.pivot( | |
# index="date", columns="model", values="score" | |
# ) | |
# finetuned_fig = bcr.bar_chart_race( | |
# finetuned_dataset, | |
# n_bars=10, | |
# fixed_max=True, | |
# period_length=1000, | |
# steps_per_period=20, | |
# end_period_pause=100, | |
# bar_texttemplate="{x:.2f}", | |
# filter_column_colors=True, | |
# fig=pretrained_fig, | |
# ) | |
# gr.HTML(finetuned_fig.data) | |
def restart_space(): | |
HfApi().restart_space( | |
repo_id="https://huggingface.co/spaces/IlyasMoutawwakil/llm-bar-race", | |
token=os.environ.get("HF_TOKEN", None), | |
) | |
# Restart space every hour | |
scheduler = BackgroundScheduler() | |
scheduler.add_job( | |
func=restart_space, | |
trigger="interval", | |
seconds=3600, | |
) | |
scheduler.start() | |
demo.queue(concurrency_count=10).launch() | |