Spaces:
Running
Running
import logging | |
import os | |
import subprocess | |
import gradio as gr | |
from apscheduler.schedulers.background import BackgroundScheduler | |
from gradio_leaderboard import Leaderboard, SelectColumns | |
from gradio_space_ci import enable_space_ci | |
from src.display.about import ( | |
INTRODUCTION_TEXT, | |
TITLE, | |
) | |
from src.display.css_html_js import custom_css | |
from src.display.utils import ( | |
AutoEvalColumn, | |
fields, | |
) | |
from src.envs import ( | |
API, | |
H4_TOKEN, | |
REPO_ID, | |
RESET_JUDGEMENT_ENV, | |
) | |
from src.leaderboard.build_leaderboard import build_leadearboard_df | |
os.environ['GRADIO_ANALYTICS_ENABLED']='false' | |
# Configure logging | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
# Start ephemeral Spaces on PRs (see config in README.md) | |
enable_space_ci() | |
def restart_space(): | |
API.restart_space(repo_id=REPO_ID, token=H4_TOKEN) | |
def build_demo(): | |
demo = gr.Blocks( | |
title = "Chatbot Arena Leaderboard", | |
css=custom_css | |
) | |
leaderboard_df = build_leadearboard_df() | |
with demo: | |
gr.HTML(TITLE) | |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") | |
with gr.Tabs(elem_classes="tab-buttons") as tabs: | |
with gr.TabItem("π LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0): | |
leaderboard = Leaderboard( | |
value=leaderboard_df, | |
datatype=[c.type for c in fields(AutoEvalColumn)], | |
select_columns=SelectColumns( | |
default_selection=[ | |
c.name | |
for c in fields(AutoEvalColumn) | |
if c.displayed_by_default | |
], | |
cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden or c.dummy], | |
label="Select Columns to Display:", | |
), | |
search_columns=[ | |
AutoEvalColumn.model.name, | |
# AutoEvalColumn.fullname.name, | |
# AutoEvalColumn.license.name | |
], | |
) | |
#with gr.TabItem("π About", elem_id="llm-benchmark-tab-table", id=1): | |
# gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") | |
#with gr.TabItem("βFAQ", elem_id="llm-benchmark-tab-table", id=2): | |
# gr.Markdown(FAQ_TEXT, elem_classes="markdown-text") | |
with gr.TabItem("π Submit ", elem_id="llm-benchmark-tab-table", id=3): | |
with gr.Row(): | |
gr.Markdown("# β¨ Submit your model here!", elem_classes="markdown-text") | |
with gr.Column(): | |
model_name_textbox = gr.Textbox(label="Model name") | |
def upload_file(file): | |
file_path = file.name.split('/')[-1] if '/' in file.name else file.name | |
logging.info("New submition: file saved to %s", file_path) | |
API.upload_file(path_or_fileobj=file.name,path_in_repo='./external/'+file_path,repo_id='Vikhrmodels/openbench-eval',repo_type='dataset') | |
os.environ[RESET_JUDGEMENT_ENV] = '1' | |
return file.name | |
if model_name_textbox: | |
file_output = gr.File() | |
upload_button = gr.UploadButton("Click to Upload & Submit Answers", file_types=['*'], file_count="single") | |
upload_button.upload(upload_file, upload_button, file_output) | |
return demo | |
# print(os.system('cd src/gen && ../../.venv/bin/python gen_judgment.py')) | |
# print(os.system('cd src/gen/ && python show_result.py --output')) | |
def update_board(): | |
need_reset = os.environ.get(RESET_JUDGEMENT_ENV) | |
if need_reset != '1': | |
return | |
os.environ[RESET_JUDGEMENT_ENV] = '0' | |
subprocess.run(['python', 'src/gen/gen_judgement.py'], check = False) | |
subprocess.Popen('python3.src/gen/show_result.py --output') | |
if __name__ == "__main__": | |
os.environ[RESET_JUDGEMENT_ENV] = '1' | |
scheduler = BackgroundScheduler() | |
scheduler.add_job(update_board, "interval", minutes=10) | |
scheduler.start() | |
demo_app = build_demo() | |
demo_app.launch(debug=True) | |