kaz-llm-lb / app.py
apsys's picture
df fixes
55fc7f4
raw
history blame
10.5 kB
import logging
import os
os.makedirs("tmp", exist_ok=True)
os.environ['TMP_DIR'] = "tmp"
import subprocess
import shutil
import glob
import gradio as gr
import numpy as np
from src.radial.radial import create_plot
from apscheduler.schedulers.background import BackgroundScheduler
from gradio_leaderboard import Leaderboard, SelectColumns
from gradio_space_ci import enable_space_ci
import json
from io import BytesIO
def handle_file_upload(file):
file_path = file.name.split("/")[-1] if "/" in file.name else file.name
logging.info("File uploaded: %s", file_path)
with open(file.name, "r") as f:
v = json.load(f)
return v, file_path
def submit_file(v, file_path, mn, profile: gr.OAuthProfile | None):
if profile is None:
return "Hub Login Required"
new_file = v['results']
new_file['model'] = profile.username + "/" + mn
new_file['moviesmc'] = new_file['moviemc']["acc,none"]
new_file['musicmc'] = new_file['musicmc']["acc,none"]
new_file['booksmc'] = new_file['bookmc']["acc,none"]
new_file['mmluproru'] = new_file['mmluproru']["acc,none"]
new_file['lawmc'] = new_file['lawmc']["acc,none"]
new_file['model_dtype'] = v['config']["model_dtype"]
new_file['ppl'] = 0
new_file.pop('moviemc')
new_file.pop('bookmc')
buf = BytesIO()
buf.write(json.dumps(new_file).encode('utf-8'))
API.upload_file(
path_or_fileobj=buf,
path_in_repo="model_data/external/" + profile.username+mn + ".json",
repo_id="Vikhrmodels/s-openbench-eval",
repo_type="dataset",
)
os.environ[RESET_JUDGEMENT_ENV] = "1"
return "Success!"
from src.display.about import (
INTRODUCTION_TEXT,
TITLE,
LLM_BENCHMARKS_TEXT
)
from src.display.css_html_js import custom_css
from src.display.utils import (
AutoEvalColumn,
fields,
)
from src.envs import API, H4_TOKEN, HF_HOME, REPO_ID, RESET_JUDGEMENT_ENV
from src.leaderboard.build_leaderboard import build_leadearboard_df, download_openbench, download_dataset
import huggingface_hub
# huggingface_hub.login(token=H4_TOKEN)
os.environ["GRADIO_ANALYTICS_ENABLED"] = "false"
# Configure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
# Start ephemeral Spaces on PRs (see config in README.md)
enable_space_ci()
# download_openbench()
def restart_space():
API.restart_space(repo_id=REPO_ID)
download_openbench()
def update_plot(selected_models):
return create_plot(selected_models)
def build_demo():
download_openbench()
demo = gr.Blocks(title="Small Shlepa", css=custom_css)
leaderboard_df = build_leadearboard_df()
with demo:
gr.HTML(TITLE)
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
with gr.Tabs(elem_classes="tab-buttons"):
with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
Leaderboard(
value=leaderboard_df,
datatype=[c.type for c in fields(AutoEvalColumn)],
select_columns=SelectColumns(
default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden or c.dummy],
label="Select Columns to Display:",
),
search_columns=[
AutoEvalColumn.model.name,
# AutoEvalColumn.fullname.name,
# AutoEvalColumn.license.name
],
)
# with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=1):
# gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
# with gr.TabItem("❗FAQ", elem_id="llm-benchmark-tab-table", id=2):
# gr.Markdown(FAQ_TEXT, elem_classes="markdown-text")
with gr.TabItem("🚀 Submit ", elem_id="llm-benchmark-tab-table", id=3):
with gr.Row():
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
with gr.Row():
gr.Markdown("# ✨ Submit your model here!", elem_classes="markdown-text")
with gr.Column():
# def upload_file(file,su,mn):
# file_path = file.name.split("/")[-1] if "/" in file.name else file.name
# logging.info("New submition: file saved to %s", file_path)
# with open(file.name, "r") as f:
# v=json.load(f)
# new_file = v['results']
# new_file['model'] = mn+"/"+su
# new_file['moviesmc']=new_file['moviemc']["acc,none"]
# new_file['musicmc']=new_file['musicmc']["acc,none"]
# new_file['booksmc']=new_file['bookmc']["acc,none"]
# new_file['lawmc']=new_file['lawmc']["acc,none"]
# # name = v['config']["model_args"].split('=')[1].split(',')[0]
# new_file['model_dtype'] = v['config']["model_dtype"]
# new_file['ppl'] = 0
# new_file.pop('moviemc')
# new_file.pop('bookmc')
# buf = BytesIO()
# buf.write(json.dumps(new_file).encode('utf-8'))
# API.upload_file(
# path_or_fileobj=buf,
# path_in_repo="model_data/external/" + su+mn + ".json",
# repo_id="Vikhrmodels/s-openbench-eval",
# repo_type="dataset",
# )
# os.environ[RESET_JUDGEMENT_ENV] = "1"
# return file.name
# gr.LoginButton()
model_name_textbox = gr.Textbox(label="Model name")
# submitter_username = gr.Textbox(label="Username")
# def toggle_upload_button(model_name, username):
# return bool(model_name) and bool(username)
file_output = gr.File(label="Drag and drop JSON file judgment here", type="filepath")
# upload_button = gr.Button("Click to Upload & Submit Answers", elem_id="upload_button",variant='primary')
uploaded_file = gr.State()
file_path = gr.State()
with gr.Row():
with gr.Column():
out = gr.Textbox("Статус отправки")
with gr.Column():
login_button = gr.LoginButton(elem_id="oauth-button")
submit_button = gr.Button("Submit File", elem_id="submit_button", variant='primary')
file_output.upload(
handle_file_upload,
file_output,
[uploaded_file, file_path]
)
submit_button.click(
submit_file,
[uploaded_file, file_path, model_name_textbox],
[out]
)
with gr.TabItem("📊 Analytics", elem_id="llm-benchmark-tab-table", id=4):
with gr.Column():
model_dropdown = gr.Dropdown(
choices=leaderboard_df["model"].tolist(),
label="Models",
value=leaderboard_df["model"].tolist(),
multiselect=True,
info="Select models"
)
with gr.Column():
plot = gr.Plot(update_plot(model_dropdown.value))
# plot = gr.Plot()
model_dropdown.change(
fn=update_plot,
inputs=[model_dropdown],
outputs=[plot]
)
return demo
# print(os.system('cd src/gen && ../../.venv/bin/python gen_judgment.py'))
# print(os.system('cd src/gen/ && python show_result.py --output'))
def update_board():
need_reset = os.environ.get(RESET_JUDGEMENT_ENV)
logging.info("Updating the judgement: %s", need_reset)
if need_reset != "1":
# return
pass
os.environ[RESET_JUDGEMENT_ENV] = "0"
# `shutil.rmtree("./m_data")` is a Python command that removes a directory and all its contents
# recursively. In this specific context, it is used to delete the directory named "m_data" along
# with all its files and subdirectories. This command helps in cleaning up the existing data in
# the "m_data" directory before downloading new dataset files into it.
shutil.rmtree("./m_data")
shutil.rmtree("./data")
download_dataset("Vikhrmodels/s-openbench-eval", "m_data")
data_list = [{"musicmc": 0.3021276595744681, "lawmc": 0.2800829875518672, "model": "apsys/saiga_3_8b", "moviesmc": 0.3472222222222222, "booksmc": 0.2800829875518672, "model_dtype": "torch.float16", "ppl": 0, 'mmluproru':0}]
for file in glob.glob("./m_data/model_data/external/*.json"):
with open(file) as f:
try:
data = json.load(f)
data_list.append(data)
except Exception as e:
pass # data was badly formatted, should not fail
print("DATALIST,", data_list)
if len(data_list)>1:
data_list.pop(0)
with open("genned.json", "w") as f:
json.dump(data_list, f)
API.upload_file(
path_or_fileobj="genned.json",
path_in_repo="leaderboard.json",
repo_id="Vikhrmodels/s-shlepa-metainfo",
repo_type="dataset",
)
restart_space()
# gen_judgement_file = os.path.join(HF_HOME, "src/gen/gen_judgement.py")
# subprocess.run(["python3", gen_judgement_file], check=True)
if __name__ == "__main__":
os.environ[RESET_JUDGEMENT_ENV] = "1"
scheduler = BackgroundScheduler()
update_board()
# scheduler.add_job(update_board, "interval", minutes=1)
scheduler.start()
demo_app = build_demo()
demo_app.launch(debug=True,share=True)