llm-bar-race / app.py
IlyasMoutawwakil's picture
init
ad8e780
raw
history blame
5.85 kB
from apscheduler.schedulers.background import BackgroundScheduler
from model_types import MODEL_TYPES, ModelType
from huggingface_hub import HfApi
import matplotlib.pyplot as plt
import bar_chart_race as bcr
import pandas as pd
import gradio as gr
import os
# MODEL_SIZES = pd.read_pickle(
# "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/resolve/main/model_size_cache.pkl"
# )
# read in the data
open_llm_race_dataset = pd.read_csv("open_llm_race_dataset.csv")
# resample for ever model to a daily frequency
open_llm_race_dataset["date"] = pd.to_datetime(open_llm_race_dataset["date"])
open_llm_race_dataset = (
open_llm_race_dataset.set_index("date", drop=True)
.groupby("model", as_index=False)
.resample("D", how="last", closed="right", fill_method="ffill")
.last()
.reset_index(drop=False)
)
# filter
open_llm_race_dataset["date"] = open_llm_race_dataset["date"].dt.strftime("%Y-%m-%d")
open_llm_race_dataset = open_llm_race_dataset[
open_llm_race_dataset["date"] >= "2023-07-10"
]
open_llm_race_dataset = open_llm_race_dataset[["date", "score", "model"]]
# drop nan values
open_llm_race_dataset.dropna(inplace=True)
# drop duplicates on model and date
open_llm_race_dataset.drop_duplicates(subset=["model", "date"], inplace=True)
# add the model type
open_llm_race_dataset["type"] = open_llm_race_dataset["model"].apply(
lambda x: MODEL_TYPES[x].name if x in MODEL_TYPES else ModelType.Unknown.name
)
# # add the model size
# open_llm_race_dataset["size"] = open_llm_race_dataset["model"].apply(
# lambda x: MODEL_SIZES[x] if x in MODEL_SIZES else None
# )
# Demo interface
demo = gr.Blocks()
with demo:
# leaderboard title
gr.HTML("<h1>Open-LLM Race πŸƒβ€β™‚οΈ</h1>")
with gr.Tabs():
with gr.TabItem(label="Pretrained Models"):
pretrained_fig, ax = plt.subplots(figsize=(12, 6))
ax.set_xlim(0, 100)
pretrained_dataset = open_llm_race_dataset[
open_llm_race_dataset["type"] == ModelType.PT.name
]
pretrained_dataset = pretrained_dataset.pivot(
index="date", columns="model", values="score"
)
pretrained_dataset.fillna(0, inplace=True)
pretrained_fig = bcr.bar_chart_race(
pretrained_dataset,
n_bars=10,
fixed_max=True,
period_length=1000,
steps_per_period=20,
end_period_pause=100,
bar_texttemplate="{x:.2f}",
filter_column_colors=True,
fig=pretrained_fig,
)
gr.HTML(pretrained_fig.data)
with gr.TabItem(label="Instructions Finetuend Models"):
inst_finetuned_fig, ax = plt.subplots(figsize=(12, 6))
ax.set_xlim(0, 100)
inst_finetuned_dataset = open_llm_race_dataset[
open_llm_race_dataset["type"] == ModelType.IFT.name
]
inst_finetuned_dataset = inst_finetuned_dataset.pivot(
index="date", columns="model", values="score"
)
inst_finetuned_dataset.fillna(0, inplace=True)
inst_finetuned_fig = bcr.bar_chart_race(
inst_finetuned_dataset,
n_bars=10,
fixed_max=True,
period_length=1000,
steps_per_period=20,
end_period_pause=100,
bar_texttemplate="{x:.2f}",
filter_column_colors=True,
fig=inst_finetuned_fig,
)
gr.HTML(inst_finetuned_fig.data)
with gr.TabItem(label="RLHF Models"):
rl_fig, ax = plt.subplots(figsize=(12, 6))
ax.set_xlim(0, 100)
rl_dataset = open_llm_race_dataset[
open_llm_race_dataset["type"] == ModelType.IFT.name
]
rl_dataset = rl_dataset.pivot(
index="date", columns="model", values="score"
)
rl_dataset.fillna(0, inplace=True)
rl_fig = bcr.bar_chart_race(
rl_dataset,
n_bars=10,
fixed_max=True,
period_length=1000,
steps_per_period=20,
end_period_pause=100,
bar_texttemplate="{x:.2f}",
filter_column_colors=True,
fig=rl_fig,
)
gr.HTML(rl_fig.data)
# with gr.TabItem(label="Finetuned Models"):
# finetuned_dataset = open_llm_race_dataset[
# open_llm_race_dataset["type"] == ModelType.FT.name
# ]
# finetuned_dataset = finetuned_dataset.pivot(
# index="date", columns="model", values="score"
# )
# finetuned_fig = bcr.bar_chart_race(
# finetuned_dataset,
# n_bars=10,
# fixed_max=True,
# period_length=1000,
# steps_per_period=20,
# end_period_pause=100,
# bar_texttemplate="{x:.2f}",
# filter_column_colors=True,
# fig=pretrained_fig,
# )
# gr.HTML(finetuned_fig.data)
def restart_space():
HfApi().restart_space(
repo_id="https://huggingface.co/spaces/IlyasMoutawwakil/llm-bar-race",
token=os.environ.get("HF_TOKEN", None),
)
# Restart space every hour
scheduler = BackgroundScheduler()
scheduler.add_job(
func=restart_space,
trigger="interval",
seconds=3600,
)
scheduler.start()
demo.queue(concurrency_count=10).launch()