Spaces:
Running
Running
hi-melnikov
commited on
Commit
β’
9358ac6
1
Parent(s):
8e67ebe
Removing model download to check GUI
Browse files- app.py +7 -13
- src/scripts/update_all_request_files.py +2 -5
- update_dynamic.py +2 -0
app.py
CHANGED
@@ -7,7 +7,6 @@ import time
|
|
7 |
import gradio as gr
|
8 |
import pandas as pd
|
9 |
from apscheduler.schedulers.background import BackgroundScheduler
|
10 |
-
from gradio_leaderboard import Leaderboard, SelectColumns
|
11 |
from gradio_space_ci import enable_space_ci
|
12 |
from huggingface_hub import snapshot_download
|
13 |
|
@@ -18,11 +17,6 @@ from src.display.about import (
|
|
18 |
TITLE,
|
19 |
)
|
20 |
from src.display.css_html_js import custom_css
|
21 |
-
from src.display.utils import (
|
22 |
-
# BENCHMARK_COLS,
|
23 |
-
AutoEvalColumn,
|
24 |
-
fields,
|
25 |
-
)
|
26 |
from src.envs import (
|
27 |
API,
|
28 |
EVAL_RESULTS_PATH,
|
@@ -87,8 +81,8 @@ def init_space(full_init: bool = True):
|
|
87 |
# download_dataset(DYNAMIC_INFO_REPO, DYNAMIC_INFO_PATH)
|
88 |
download_dataset("Vikhrmodels/openbench-eval", EVAL_RESULTS_PATH)
|
89 |
# print(subprocess.Popen('ls src'))
|
90 |
-
subprocess.run(['rsync', '-avzP', '--ignore-existing', f'{EVAL_RESULTS_PATH[2:]}/external/*', 'src/gen/data/arena-hard-v0.1/model_answer/'])
|
91 |
-
subprocess.run(['rsync', '-avzP', '--ignore-existing', f'{EVAL_RESULTS_PATH[2:]}/model_judgment/*', 'src/gen/data/arena-hard-v0.1/model_judgement/'])
|
92 |
# except Exception:
|
93 |
# restart_space()
|
94 |
|
@@ -98,7 +92,6 @@ def init_space(full_init: bool = True):
|
|
98 |
|
99 |
leaderboard_df = original_df.copy()
|
100 |
|
101 |
-
|
102 |
return leaderboard_df
|
103 |
|
104 |
# Convert the environment variable "LEADERBOARD_FULL_INIT" to a boolean value, defaulting to True if the variable is not set.
|
@@ -107,7 +100,7 @@ do_full_init = os.getenv("LEADERBOARD_FULL_INIT", "True") == "True"
|
|
107 |
|
108 |
# Calls the init_space function with the `full_init` parameter determined by the `do_full_init` variable.
|
109 |
# This initializes various DataFrames used throughout the application, with the level of initialization detail controlled by the `do_full_init` flag.
|
110 |
-
leaderboard_df = init_space(full_init=do_full_init)
|
111 |
|
112 |
demo = gr.Blocks(css=custom_css)
|
113 |
with demo:
|
@@ -117,6 +110,7 @@ with demo:
|
|
117 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
118 |
with gr.TabItem("π
LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
119 |
pass
|
|
|
120 |
leaderboard = Leaderboard(
|
121 |
value=leaderboard_df,
|
122 |
datatype=[c.type for c in fields(AutoEvalColumn)],
|
@@ -135,6 +129,7 @@ with demo:
|
|
135 |
# AutoEvalColumn.license.name
|
136 |
],
|
137 |
)
|
|
|
138 |
|
139 |
|
140 |
with gr.TabItem("π About", elem_id="llm-benchmark-tab-table", id=3):
|
@@ -151,9 +146,8 @@ with demo:
|
|
151 |
with gr.Column():
|
152 |
model_name_textbox = gr.Textbox(label="Model name")
|
153 |
def upload_file(file):
|
154 |
-
print(file.name)
|
155 |
file_path = file.name.split('/')[-1] if '/' in file.name else file.name
|
156 |
-
|
157 |
API.upload_file(path_or_fileobj=file.name,path_in_repo='./external/'+file_path,repo_id='Vikhrmodels/openbench-eval',repo_type='dataset')
|
158 |
os.environ[RESET_JUDGEMENT_ENV] = '1'
|
159 |
|
@@ -173,7 +167,7 @@ def update_board():
|
|
173 |
|
174 |
os.environ[RESET_JUDGEMENT_ENV] = '0'
|
175 |
|
176 |
-
subprocess.run(['python','../gen/gen_judgement.py'])
|
177 |
|
178 |
subprocess.Popen('python3 ../gen/show_result.py --output')
|
179 |
|
|
|
7 |
import gradio as gr
|
8 |
import pandas as pd
|
9 |
from apscheduler.schedulers.background import BackgroundScheduler
|
|
|
10 |
from gradio_space_ci import enable_space_ci
|
11 |
from huggingface_hub import snapshot_download
|
12 |
|
|
|
17 |
TITLE,
|
18 |
)
|
19 |
from src.display.css_html_js import custom_css
|
|
|
|
|
|
|
|
|
|
|
20 |
from src.envs import (
|
21 |
API,
|
22 |
EVAL_RESULTS_PATH,
|
|
|
81 |
# download_dataset(DYNAMIC_INFO_REPO, DYNAMIC_INFO_PATH)
|
82 |
download_dataset("Vikhrmodels/openbench-eval", EVAL_RESULTS_PATH)
|
83 |
# print(subprocess.Popen('ls src'))
|
84 |
+
subprocess.run(['rsync', '-avzP', '--ignore-existing', f'{EVAL_RESULTS_PATH[2:]}/external/*', 'src/gen/data/arena-hard-v0.1/model_answer/'], check=False)
|
85 |
+
subprocess.run(['rsync', '-avzP', '--ignore-existing', f'{EVAL_RESULTS_PATH[2:]}/model_judgment/*', 'src/gen/data/arena-hard-v0.1/model_judgement/'], check=False)
|
86 |
# except Exception:
|
87 |
# restart_space()
|
88 |
|
|
|
92 |
|
93 |
leaderboard_df = original_df.copy()
|
94 |
|
|
|
95 |
return leaderboard_df
|
96 |
|
97 |
# Convert the environment variable "LEADERBOARD_FULL_INIT" to a boolean value, defaulting to True if the variable is not set.
|
|
|
100 |
|
101 |
# Calls the init_space function with the `full_init` parameter determined by the `do_full_init` variable.
|
102 |
# This initializes various DataFrames used throughout the application, with the level of initialization detail controlled by the `do_full_init` flag.
|
103 |
+
# leaderboard_df = init_space(full_init=do_full_init)
|
104 |
|
105 |
demo = gr.Blocks(css=custom_css)
|
106 |
with demo:
|
|
|
110 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
111 |
with gr.TabItem("π
LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
112 |
pass
|
113 |
+
"""
|
114 |
leaderboard = Leaderboard(
|
115 |
value=leaderboard_df,
|
116 |
datatype=[c.type for c in fields(AutoEvalColumn)],
|
|
|
129 |
# AutoEvalColumn.license.name
|
130 |
],
|
131 |
)
|
132 |
+
"""
|
133 |
|
134 |
|
135 |
with gr.TabItem("π About", elem_id="llm-benchmark-tab-table", id=3):
|
|
|
146 |
with gr.Column():
|
147 |
model_name_textbox = gr.Textbox(label="Model name")
|
148 |
def upload_file(file):
|
|
|
149 |
file_path = file.name.split('/')[-1] if '/' in file.name else file.name
|
150 |
+
logging.info("New submition: file saved to %s", file_path)
|
151 |
API.upload_file(path_or_fileobj=file.name,path_in_repo='./external/'+file_path,repo_id='Vikhrmodels/openbench-eval',repo_type='dataset')
|
152 |
os.environ[RESET_JUDGEMENT_ENV] = '1'
|
153 |
|
|
|
167 |
|
168 |
os.environ[RESET_JUDGEMENT_ENV] = '0'
|
169 |
|
170 |
+
subprocess.run(['python','../gen/gen_judgement.py'], check = False)
|
171 |
|
172 |
subprocess.Popen('python3 ../gen/show_result.py --output')
|
173 |
|
src/scripts/update_all_request_files.py
CHANGED
@@ -1,10 +1,8 @@
|
|
1 |
import json
|
2 |
import os
|
3 |
-
import
|
4 |
|
5 |
-
from
|
6 |
-
|
7 |
-
from src.envs import API, DYNAMIC_INFO_FILE_PATH, DYNAMIC_INFO_PATH, DYNAMIC_INFO_REPO, EVAL_REQUESTS_PATH, H4_TOKEN
|
8 |
from src.submission.check_validity import check_model_card, get_model_tags, is_model_on_hub
|
9 |
|
10 |
|
@@ -93,7 +91,6 @@ def update_models(file_path, models_on_the_hub):
|
|
93 |
|
94 |
def update_dynamic_files():
|
95 |
# from gen import gen_answer,gen_judgment\
|
96 |
-
import subprocess
|
97 |
subprocess.Popen('python3 ../gen/gen_judgement.py')
|
98 |
|
99 |
subprocess.Popen('python3 ../gen/show_result.py --output')
|
|
|
1 |
import json
|
2 |
import os
|
3 |
+
import subprocess
|
4 |
|
5 |
+
from src.envs import EVAL_REQUESTS_PATH, H4_TOKEN
|
|
|
|
|
6 |
from src.submission.check_validity import check_model_card, get_model_tags, is_model_on_hub
|
7 |
|
8 |
|
|
|
91 |
|
92 |
def update_dynamic_files():
|
93 |
# from gen import gen_answer,gen_judgment\
|
|
|
94 |
subprocess.Popen('python3 ../gen/gen_judgement.py')
|
95 |
|
96 |
subprocess.Popen('python3 ../gen/show_result.py --output')
|
update_dynamic.py
CHANGED
@@ -1,4 +1,6 @@
|
|
1 |
from src.scripts.update_all_request_files import update_dynamic_files
|
2 |
|
3 |
if __name__ == "__main__":
|
|
|
|
|
4 |
update_dynamic_files()
|
|
|
1 |
from src.scripts.update_all_request_files import update_dynamic_files
|
2 |
|
3 |
if __name__ == "__main__":
|
4 |
+
print("Updating dynamic files")
|
5 |
+
# Pretty sure this will never be called
|
6 |
update_dynamic_files()
|