kaz-llm-lb

Running

App Files Files Community

kaz-llm-lb / app.py

hi-melnikov

Moved build leaderboard to different folder

d317f64 7 months ago

raw

history blame

4.43 kB

	import logging
	import os
	import subprocess

	import gradio as gr
	from apscheduler.schedulers.background import BackgroundScheduler
	from gradio_leaderboard import Leaderboard, SelectColumns
	from gradio_space_ci import enable_space_ci

	from src.display.about import (
	INTRODUCTION_TEXT,
	TITLE,
	)
	from src.display.css_html_js import custom_css
	from src.display.utils import (
	AutoEvalColumn,
	fields,
	)
	from src.envs import (
	API,
	H4_TOKEN,
	REPO_ID,
	RESET_JUDGEMENT_ENV,
	)
	from src.leaderboard.build_leaderboard import build_leadearboard_df

	os.environ['GRADIO_ANALYTICS_ENABLED']='false'

	# Configure logging
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

	# Start ephemeral Spaces on PRs (see config in README.md)
	enable_space_ci()

	def restart_space():
	API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)


	def build_demo():
	demo = gr.Blocks(
	title = "Chatbot Arena Leaderboard",
	css=custom_css
	)
	leaderboard_df = build_leadearboard_df()
	with demo:
	gr.HTML(TITLE)
	gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")

	with gr.Tabs(elem_classes="tab-buttons") as tabs:
	with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
	leaderboard = Leaderboard(
	value=leaderboard_df,
	datatype=[c.type for c in fields(AutoEvalColumn)],
	select_columns=SelectColumns(
	default_selection=[
	c.name
	for c in fields(AutoEvalColumn)
	if c.displayed_by_default
	],
	cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden or c.dummy],
	label="Select Columns to Display:",
	),
	search_columns=[
	AutoEvalColumn.model.name,
	# AutoEvalColumn.fullname.name,
	# AutoEvalColumn.license.name
	],
	)

	#with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=1):
	# gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
	#with gr.TabItem("❗FAQ", elem_id="llm-benchmark-tab-table", id=2):
	# gr.Markdown(FAQ_TEXT, elem_classes="markdown-text")

	with gr.TabItem("🚀 Submit ", elem_id="llm-benchmark-tab-table", id=3):

	with gr.Row():
	gr.Markdown("# ✨ Submit your model here!", elem_classes="markdown-text")

	with gr.Column():
	model_name_textbox = gr.Textbox(label="Model name")
	def upload_file(file):
	file_path = file.name.split('/')[-1] if '/' in file.name else file.name
	logging.info("New submition: file saved to %s", file_path)
	API.upload_file(path_or_fileobj=file.name,path_in_repo='./external/'+file_path,repo_id='Vikhrmodels/openbench-eval',repo_type='dataset')
	os.environ[RESET_JUDGEMENT_ENV] = '1'
	return file.name

	if model_name_textbox:
	file_output = gr.File()
	upload_button = gr.UploadButton("Click to Upload & Submit Answers", file_types=['*'], file_count="single")
	upload_button.upload(upload_file, upload_button, file_output)

	return demo

	# print(os.system('cd src/gen && ../../.venv/bin/python gen_judgment.py'))
	# print(os.system('cd src/gen/ && python show_result.py --output'))

	def update_board():
	need_reset = os.environ.get(RESET_JUDGEMENT_ENV)
	if need_reset != '1':
	return
	os.environ[RESET_JUDGEMENT_ENV] = '0'
	subprocess.run(['python', 'src/gen/gen_judgement.py'], check = False)
	subprocess.Popen('python3.src/gen/show_result.py --output')


	if __name__ == "__main__":
	os.environ[RESET_JUDGEMENT_ENV] = '1'

	scheduler = BackgroundScheduler()
	scheduler.add_job(update_board, "interval", minutes=10)
	scheduler.start()

	demo_app = build_demo()
	demo_app.launch(debug=True)