Spaces:
Running
Running
df fixes
Browse files- app.py +6 -7
- data/leaderboard.json +1 -42
- genned.json +1 -0
- m_data/generate_leaderboard.py +0 -29
- src/leaderboard/build_leaderboard.py +3 -1
app.py
CHANGED
@@ -4,6 +4,7 @@ os.makedirs("tmp", exist_ok=True)
|
|
4 |
os.environ['TMP_DIR'] = "tmp"
|
5 |
import subprocess
|
6 |
import shutil
|
|
|
7 |
import gradio as gr
|
8 |
import numpy as np
|
9 |
from src.radial.radial import create_plot
|
@@ -81,7 +82,6 @@ def build_demo():
|
|
81 |
download_openbench()
|
82 |
demo = gr.Blocks(title="Small Shlepa", css=custom_css)
|
83 |
leaderboard_df = build_leadearboard_df()
|
84 |
-
default_model = leaderboard_df["model"].iloc[0]
|
85 |
with demo:
|
86 |
gr.HTML(TITLE)
|
87 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
@@ -115,7 +115,7 @@ def build_demo():
|
|
115 |
gr.Markdown("# ✨ Submit your model here!", elem_classes="markdown-text")
|
116 |
|
117 |
with gr.Column():
|
118 |
-
|
119 |
# def upload_file(file,su,mn):
|
120 |
# file_path = file.name.split("/")[-1] if "/" in file.name else file.name
|
121 |
# logging.info("New submition: file saved to %s", file_path)
|
@@ -175,8 +175,8 @@ def build_demo():
|
|
175 |
with gr.TabItem("📊 Analytics", elem_id="llm-benchmark-tab-table", id=4):
|
176 |
with gr.Column():
|
177 |
model_dropdown = gr.Dropdown(
|
178 |
-
choices=leaderboard_df["model"].tolist(),
|
179 |
-
label="Models",
|
180 |
value=leaderboard_df["model"].tolist(),
|
181 |
multiselect=True,
|
182 |
info="Select models"
|
@@ -211,7 +211,6 @@ def update_board():
|
|
211 |
shutil.rmtree("./m_data")
|
212 |
shutil.rmtree("./data")
|
213 |
download_dataset("Vikhrmodels/s-openbench-eval", "m_data")
|
214 |
-
import glob
|
215 |
data_list = [{"musicmc": 0.3021276595744681, "lawmc": 0.2800829875518672, "model": "apsys/saiga_3_8b", "moviesmc": 0.3472222222222222, "booksmc": 0.2800829875518672, "model_dtype": "torch.float16", "ppl": 0, 'mmluproru':0}]
|
216 |
for file in glob.glob("./m_data/model_data/external/*.json"):
|
217 |
with open(file) as f:
|
@@ -244,8 +243,8 @@ if __name__ == "__main__":
|
|
244 |
os.environ[RESET_JUDGEMENT_ENV] = "1"
|
245 |
|
246 |
scheduler = BackgroundScheduler()
|
247 |
-
|
248 |
-
scheduler.add_job(update_board, "interval", minutes=1)
|
249 |
scheduler.start()
|
250 |
|
251 |
demo_app = build_demo()
|
|
|
4 |
os.environ['TMP_DIR'] = "tmp"
|
5 |
import subprocess
|
6 |
import shutil
|
7 |
+
import glob
|
8 |
import gradio as gr
|
9 |
import numpy as np
|
10 |
from src.radial.radial import create_plot
|
|
|
82 |
download_openbench()
|
83 |
demo = gr.Blocks(title="Small Shlepa", css=custom_css)
|
84 |
leaderboard_df = build_leadearboard_df()
|
|
|
85 |
with demo:
|
86 |
gr.HTML(TITLE)
|
87 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
|
|
115 |
gr.Markdown("# ✨ Submit your model here!", elem_classes="markdown-text")
|
116 |
|
117 |
with gr.Column():
|
118 |
+
|
119 |
# def upload_file(file,su,mn):
|
120 |
# file_path = file.name.split("/")[-1] if "/" in file.name else file.name
|
121 |
# logging.info("New submition: file saved to %s", file_path)
|
|
|
175 |
with gr.TabItem("📊 Analytics", elem_id="llm-benchmark-tab-table", id=4):
|
176 |
with gr.Column():
|
177 |
model_dropdown = gr.Dropdown(
|
178 |
+
choices=leaderboard_df["model"].tolist(),
|
179 |
+
label="Models",
|
180 |
value=leaderboard_df["model"].tolist(),
|
181 |
multiselect=True,
|
182 |
info="Select models"
|
|
|
211 |
shutil.rmtree("./m_data")
|
212 |
shutil.rmtree("./data")
|
213 |
download_dataset("Vikhrmodels/s-openbench-eval", "m_data")
|
|
|
214 |
data_list = [{"musicmc": 0.3021276595744681, "lawmc": 0.2800829875518672, "model": "apsys/saiga_3_8b", "moviesmc": 0.3472222222222222, "booksmc": 0.2800829875518672, "model_dtype": "torch.float16", "ppl": 0, 'mmluproru':0}]
|
215 |
for file in glob.glob("./m_data/model_data/external/*.json"):
|
216 |
with open(file) as f:
|
|
|
243 |
os.environ[RESET_JUDGEMENT_ENV] = "1"
|
244 |
|
245 |
scheduler = BackgroundScheduler()
|
246 |
+
update_board()
|
247 |
+
# scheduler.add_job(update_board, "interval", minutes=1)
|
248 |
scheduler.start()
|
249 |
|
250 |
demo_app = build_demo()
|
data/leaderboard.json
CHANGED
@@ -1,42 +1 @@
|
|
1 |
-
[
|
2 |
-
{
|
3 |
-
"musicmc": 0.09361702127659574,
|
4 |
-
"mmluproru": 0.10207253886010363,
|
5 |
-
"lawmc": 0.11431513903192585,
|
6 |
-
"model": "NousResearch/Llama-2-7b-hf",
|
7 |
-
"moviesmc": 0.07175925925925926,
|
8 |
-
"booksmc": 0.1078838174273859,
|
9 |
-
"model_dtype": "torch.float16",
|
10 |
-
"ppl": 0
|
11 |
-
},
|
12 |
-
{
|
13 |
-
"musicmc": 0.2553191489361702,
|
14 |
-
"mmluproru": 0.2621761658031088,
|
15 |
-
"lawmc": 0.5818743563336766,
|
16 |
-
"model": "google/gemma-2-9b",
|
17 |
-
"moviesmc": 0.5046296296296297,
|
18 |
-
"booksmc": 0.3360995850622407,
|
19 |
-
"model_dtype": "torch.float16",
|
20 |
-
"ppl": 0
|
21 |
-
},
|
22 |
-
{
|
23 |
-
"musicmc": 0.2680851063829787,
|
24 |
-
"mmluproru": 0.20103626943005182,
|
25 |
-
"lawmc": 0.5386199794026777,
|
26 |
-
"model": "Vikhrmodels/it-5.2-fp16-cp",
|
27 |
-
"moviesmc": 0.4537037037037037,
|
28 |
-
"booksmc": 0.3070539419087137,
|
29 |
-
"model_dtype": "torch.float16",
|
30 |
-
"ppl": 0
|
31 |
-
},
|
32 |
-
{
|
33 |
-
"musicmc": 0.25957446808510637,
|
34 |
-
"mmluproru": 0.19378238341968912,
|
35 |
-
"lawmc": 0.518022657054583,
|
36 |
-
"model": "lightblue/suzume-llama-3-8B-multilingual",
|
37 |
-
"moviesmc": 0.3287037037037037,
|
38 |
-
"booksmc": 0.2966804979253112,
|
39 |
-
"model_dtype": "torch.float16",
|
40 |
-
"ppl": 0
|
41 |
-
}
|
42 |
-
]
|
|
|
1 |
+
[{"musicmc": 0.2936170212765957, "lawmc": 0.5345005149330587, "model": "RefalMachine/llama3 ushanka", "moviesmc": 0.35185185185185186, "booksmc": 0.3257261410788382, "model_dtype": "torch.bfloat16", "ppl": 0}, {"musicmc": 0.251063829787234, "lawmc": 0.48712667353244077, "model": "apsys/vikhr-52-7b", "moviesmc": 0.4212962962962963, "booksmc": 0.3112033195020747, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.09361702127659574, "mmluproru": 0.10207253886010363, "lawmc": 0.11431513903192585, "model": "NousResearch/Llama-2-7b-hf", "moviesmc": 0.07175925925925926, "booksmc": 0.1078838174273859, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.2553191489361702, "mmluproru": 0.2621761658031088, "lawmc": 0.5818743563336766, "model": "google/gemma-2-9b", "moviesmc": 0.5046296296296297, "booksmc": 0.3360995850622407, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.20851063829787234, "lawmc": 0.42636457260556127, "model": "cohere/aya-8b", "moviesmc": 0.3287037037037037, "booksmc": 0.24273858921161826, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.2936170212765957, "lawmc": 0.48094747682801237, "model": "apsys/saiga_3_8b", "moviesmc": 0.3402777777777778, "booksmc": 0.3112033195020747, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.3021276595744681, "lawmc": 0.544799176107106, "model": "alexwortega/saiga_submit", "moviesmc": 0.3958333333333333, "booksmc": 0.3381742738589212, "model_dtype": "torch.bfloat16", "ppl": 0}, {"musicmc": 0.28297872340425534, "lawmc": 0.5406797116374872, "model": "microsoft/Phi-3-medium-4k-instruct", "moviesmc": 0.42824074074074076, "booksmc": 0.3817427385892116, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.28085106382978725, "mmluproru": 0.17979274611398963, "lawmc": 0.5324407826982492, "model": "apsys/tlite-it-0.1", "moviesmc": 0.4699074074074074, "booksmc": 0.3360995850622407, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.2680851063829787, "mmluproru": 0.20103626943005182, "lawmc": 0.5386199794026777, "model": "Vikhrmodels/it-5.2-fp16-cp", "moviesmc": 0.4537037037037037, "booksmc": 0.3070539419087137, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.2723404255319149, "lawmc": 0.4850669412976313, "model": "Nexusflow/Starling-LM-7B-beta", "moviesmc": 0.38657407407407407, "booksmc": 0.3070539419087137, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.20851063829787234, "lawmc": 0.47167868177136973, "model": "Salesforce/LLaMA-3-8B-SFR-Iterative-DPO-R", "moviesmc": 0.3055555555555556, "booksmc": 0.26141078838174275, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.25957446808510637, "mmluproru": 0.19378238341968912, "lawmc": 0.518022657054583, "model": "lightblue/suzume-llama-3-8B-multilingual", "moviesmc": 0.3287037037037037, "booksmc": 0.2966804979253112, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.2872340425531915, "lawmc": 0.5066941297631308, "model": "vikhr-52-7b-chat-hf/apsys", "moviesmc": 0.4837962962962963, "booksmc": 0.3070539419087137, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.3021276595744681, "lawmc": 0.544799176107106, "model": "IlyaGusev/saiga_llama3_8b", "moviesmc": 0.3958333333333333, "booksmc": 0.3381742738589212, "model_dtype": "torch.bfloat16", "ppl": 0}, {"musicmc": 0.24468085106382978, "lawmc": 0.4788877445932029, "model": "apsys/vikhr-53-7b-32k", "moviesmc": 0.4050925925925926, "booksmc": 0.3049792531120332, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.28085106382978725, "mmluproru": 0.17979274611398963, "lawmc": 0.5324407826982492, "model": "apsys/T-lite-instruct-0.1", "moviesmc": 0.4699074074074074, "booksmc": 0.3360995850622407, "model_dtype": "torch.float16", "ppl": 0}]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
genned.json
CHANGED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[{"musicmc": 0.2936170212765957, "lawmc": 0.5345005149330587, "model": "RefalMachine/llama3 ushanka", "moviesmc": 0.35185185185185186, "booksmc": 0.3257261410788382, "model_dtype": "torch.bfloat16", "ppl": 0}, {"musicmc": 0.251063829787234, "lawmc": 0.48712667353244077, "model": "apsys/vikhr-52-7b", "moviesmc": 0.4212962962962963, "booksmc": 0.3112033195020747, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.09361702127659574, "mmluproru": 0.10207253886010363, "lawmc": 0.11431513903192585, "model": "NousResearch/Llama-2-7b-hf", "moviesmc": 0.07175925925925926, "booksmc": 0.1078838174273859, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.2553191489361702, "mmluproru": 0.2621761658031088, "lawmc": 0.5818743563336766, "model": "google/gemma-2-9b", "moviesmc": 0.5046296296296297, "booksmc": 0.3360995850622407, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.20851063829787234, "lawmc": 0.42636457260556127, "model": "cohere/aya-8b", "moviesmc": 0.3287037037037037, "booksmc": 0.24273858921161826, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.2936170212765957, "lawmc": 0.48094747682801237, "model": "apsys/saiga_3_8b", "moviesmc": 0.3402777777777778, "booksmc": 0.3112033195020747, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.3021276595744681, "lawmc": 0.544799176107106, "model": "alexwortega/saiga_submit", "moviesmc": 0.3958333333333333, "booksmc": 0.3381742738589212, "model_dtype": "torch.bfloat16", "ppl": 0}, {"musicmc": 0.28297872340425534, "lawmc": 0.5406797116374872, "model": "microsoft/Phi-3-medium-4k-instruct", "moviesmc": 0.42824074074074076, "booksmc": 0.3817427385892116, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.28085106382978725, "mmluproru": 0.17979274611398963, "lawmc": 0.5324407826982492, "model": "apsys/tlite-it-0.1", "moviesmc": 0.4699074074074074, "booksmc": 0.3360995850622407, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.2680851063829787, "mmluproru": 0.20103626943005182, "lawmc": 0.5386199794026777, "model": "Vikhrmodels/it-5.2-fp16-cp", "moviesmc": 0.4537037037037037, "booksmc": 0.3070539419087137, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.2723404255319149, "lawmc": 0.4850669412976313, "model": "Nexusflow/Starling-LM-7B-beta", "moviesmc": 0.38657407407407407, "booksmc": 0.3070539419087137, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.20851063829787234, "lawmc": 0.47167868177136973, "model": "Salesforce/LLaMA-3-8B-SFR-Iterative-DPO-R", "moviesmc": 0.3055555555555556, "booksmc": 0.26141078838174275, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.25957446808510637, "mmluproru": 0.19378238341968912, "lawmc": 0.518022657054583, "model": "lightblue/suzume-llama-3-8B-multilingual", "moviesmc": 0.3287037037037037, "booksmc": 0.2966804979253112, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.2872340425531915, "lawmc": 0.5066941297631308, "model": "vikhr-52-7b-chat-hf/apsys", "moviesmc": 0.4837962962962963, "booksmc": 0.3070539419087137, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.3021276595744681, "lawmc": 0.544799176107106, "model": "IlyaGusev/saiga_llama3_8b", "moviesmc": 0.3958333333333333, "booksmc": 0.3381742738589212, "model_dtype": "torch.bfloat16", "ppl": 0}, {"musicmc": 0.24468085106382978, "lawmc": 0.4788877445932029, "model": "apsys/vikhr-53-7b-32k", "moviesmc": 0.4050925925925926, "booksmc": 0.3049792531120332, "model_dtype": "torch.float16", "ppl": 0}, {"musicmc": 0.28085106382978725, "mmluproru": 0.17979274611398963, "lawmc": 0.5324407826982492, "model": "apsys/T-lite-instruct-0.1", "moviesmc": 0.4699074074074074, "booksmc": 0.3360995850622407, "model_dtype": "torch.float16", "ppl": 0}]
|
m_data/generate_leaderboard.py
DELETED
@@ -1,29 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import json
|
3 |
-
|
4 |
-
def merge_json_files(folder_path, output_file):
|
5 |
-
merged_data = []
|
6 |
-
if not os.path.exists(folder_path):
|
7 |
-
print(f"Папка '{folder_path}' не существует.")
|
8 |
-
return
|
9 |
-
|
10 |
-
for filename in os.listdir(folder_path):
|
11 |
-
if filename.endswith('.json'):
|
12 |
-
file_path = os.path.join(folder_path, filename)
|
13 |
-
try:
|
14 |
-
with open(file_path, 'r', encoding='utf-8') as f:
|
15 |
-
data = json.load(f)
|
16 |
-
merged_data.append(data)
|
17 |
-
except Exception as e:
|
18 |
-
print(f"Ошибка при чтении файла '{file_path}': {e}")
|
19 |
-
|
20 |
-
with open(output_file, 'w', encoding='utf-8') as f:
|
21 |
-
json.dump(merged_data, f, ensure_ascii=False, indent=4)
|
22 |
-
|
23 |
-
print(f"Данные успешно объединены и сохранены в файл '{output_file}'.")
|
24 |
-
|
25 |
-
if __name__ == "__main__":
|
26 |
-
folder_path = './model_data/external'
|
27 |
-
output_file = 'leaderboard.json'
|
28 |
-
|
29 |
-
merge_json_files(folder_path, output_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/leaderboard/build_leaderboard.py
CHANGED
@@ -74,7 +74,9 @@ def build_leadearboard_df():
|
|
74 |
leaderboard_df = df[['model','mmluproru','moviesmc','musicmc','lawmc','booksmc','model_dtype','ppl']]
|
75 |
leaderboard_df['avg'] = leaderboard_df[['moviesmc','musicmc','lawmc','booksmc','mmluproru']].mean(axis=1).values
|
76 |
# print(leaderboard_df.columns)
|
77 |
-
|
|
|
|
|
78 |
leaderboard_df.sort_values(by='avg',ascending=False,inplace=True,axis=0)
|
79 |
numeric_cols = leaderboard_df.select_dtypes(include=['number']).columns
|
80 |
# print(numeric_cols)
|
|
|
74 |
leaderboard_df = df[['model','mmluproru','moviesmc','musicmc','lawmc','booksmc','model_dtype','ppl']]
|
75 |
leaderboard_df['avg'] = leaderboard_df[['moviesmc','musicmc','lawmc','booksmc','mmluproru']].mean(axis=1).values
|
76 |
# print(leaderboard_df.columns)
|
77 |
+
if len(leaderboard_df)>3:
|
78 |
+
leaderboard_df = leaderboard_df[leaderboard_df['mmluproru']!=0]
|
79 |
+
logging.info("Leaderboard DataFrame shape:", leaderboard_df)
|
80 |
leaderboard_df.sort_values(by='avg',ascending=False,inplace=True,axis=0)
|
81 |
numeric_cols = leaderboard_df.select_dtypes(include=['number']).columns
|
82 |
# print(numeric_cols)
|