from apscheduler.schedulers.background import BackgroundScheduler from model_types import MODEL_TYPES, ModelType from huggingface_hub import HfApi import matplotlib.pyplot as plt import bar_chart_race as bcr import pandas as pd import gradio as gr import os # MODEL_SIZES = pd.read_pickle( # "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/resolve/main/model_size_cache.pkl" # ) # read in the data open_llm_race_dataset = pd.read_csv("open_llm_race_dataset.csv") # resample for ever model to a daily frequency open_llm_race_dataset["date"] = pd.to_datetime(open_llm_race_dataset["date"]) open_llm_race_dataset = ( open_llm_race_dataset.set_index("date", drop=True) .groupby("model", as_index=False) .resample("D", how="last", closed="right", fill_method="ffill") .last() .reset_index(drop=False) ) # filter open_llm_race_dataset["date"] = open_llm_race_dataset["date"].dt.strftime("%Y-%m-%d") open_llm_race_dataset = open_llm_race_dataset[ open_llm_race_dataset["date"] >= "2023-07-10" ] open_llm_race_dataset = open_llm_race_dataset[["date", "score", "model"]] # drop nan values open_llm_race_dataset.dropna(inplace=True) # drop duplicates on model and date open_llm_race_dataset.drop_duplicates(subset=["model", "date"], inplace=True) # add the model type open_llm_race_dataset["type"] = open_llm_race_dataset["model"].apply( lambda x: MODEL_TYPES[x].name if x in MODEL_TYPES else ModelType.Unknown.name ) # # add the model size # open_llm_race_dataset["size"] = open_llm_race_dataset["model"].apply( # lambda x: MODEL_SIZES[x] if x in MODEL_SIZES else None # ) # Demo interface demo = gr.Blocks() with demo: # leaderboard title gr.HTML("

Open-LLM Race 🏃‍♂️

") with gr.Tabs(): with gr.TabItem(label="Pretrained Models"): pretrained_fig, ax = plt.subplots(figsize=(12, 6)) ax.set_xlim(0, 100) pretrained_dataset = open_llm_race_dataset[ open_llm_race_dataset["type"] == ModelType.PT.name ] pretrained_dataset = pretrained_dataset.pivot( index="date", columns="model", values="score" ) pretrained_dataset.fillna(0, inplace=True) pretrained_fig = bcr.bar_chart_race( pretrained_dataset, n_bars=10, fixed_max=True, period_length=1000, steps_per_period=20, end_period_pause=100, bar_texttemplate="{x:.2f}", filter_column_colors=True, fig=pretrained_fig, ) gr.HTML(pretrained_fig.data) with gr.TabItem(label="Instructions Finetuend Models"): inst_finetuned_fig, ax = plt.subplots(figsize=(12, 6)) ax.set_xlim(0, 100) inst_finetuned_dataset = open_llm_race_dataset[ open_llm_race_dataset["type"] == ModelType.IFT.name ] inst_finetuned_dataset = inst_finetuned_dataset.pivot( index="date", columns="model", values="score" ) inst_finetuned_dataset.fillna(0, inplace=True) inst_finetuned_fig = bcr.bar_chart_race( inst_finetuned_dataset, n_bars=10, fixed_max=True, period_length=1000, steps_per_period=20, end_period_pause=100, bar_texttemplate="{x:.2f}", filter_column_colors=True, fig=inst_finetuned_fig, ) gr.HTML(inst_finetuned_fig.data) with gr.TabItem(label="RLHF Models"): rl_fig, ax = plt.subplots(figsize=(12, 6)) ax.set_xlim(0, 100) rl_dataset = open_llm_race_dataset[ open_llm_race_dataset["type"] == ModelType.IFT.name ] rl_dataset = rl_dataset.pivot( index="date", columns="model", values="score" ) rl_dataset.fillna(0, inplace=True) rl_fig = bcr.bar_chart_race( rl_dataset, n_bars=10, fixed_max=True, period_length=1000, steps_per_period=20, end_period_pause=100, bar_texttemplate="{x:.2f}", filter_column_colors=True, fig=rl_fig, ) gr.HTML(rl_fig.data) # with gr.TabItem(label="Finetuned Models"): # finetuned_dataset = open_llm_race_dataset[ # open_llm_race_dataset["type"] == ModelType.FT.name # ] # finetuned_dataset = finetuned_dataset.pivot( # index="date", columns="model", values="score" # ) # finetuned_fig = bcr.bar_chart_race( # finetuned_dataset, # n_bars=10, # fixed_max=True, # period_length=1000, # steps_per_period=20, # end_period_pause=100, # bar_texttemplate="{x:.2f}", # filter_column_colors=True, # fig=pretrained_fig, # ) # gr.HTML(finetuned_fig.data) def restart_space(): HfApi().restart_space( repo_id="https://huggingface.co/spaces/IlyasMoutawwakil/llm-bar-race", token=os.environ.get("HF_TOKEN", None), ) # Restart space every hour scheduler = BackgroundScheduler() scheduler.add_job( func=restart_space, trigger="interval", seconds=3600, ) scheduler.start() demo.queue(concurrency_count=10).launch()