File size: 10,838 Bytes
2ae3b27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f9507b3
2ae3b27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
014f3a8
9a97f7c
014f3a8
 
 
 
 
 
 
 
 
dcad14d
 
 
649782b
 
56c9bdf
 
 
 
5944dae
dcad14d
014f3a8
 
 
 
d25c469
f9507b3
d25c469
 
 
fa556f8
df9540e
 
 
 
d25c469
 
 
 
 
 
 
 
014f3a8
d25c469
 
df9540e
d25c469
2ae3b27
 
 
 
 
 
1521b6f
0ea8496
2ae3b27
 
90cb6cf
2ae3b27
 
e6d1f05
2ae3b27
 
 
e6d1f05
0ea8496
2ae3b27
 
90cb6cf
9a97f7c
 
90cb6cf
 
 
 
5944dae
 
 
 
 
 
 
 
 
 
 
90cb6cf
2ae3b27
e6d1f05
 
2ae3b27
 
 
 
72b2069
90cb6cf
2ae3b27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5944dae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ae3b27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a784078
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
__all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissions']
import os

import gradio as gr
import pandas as pd
import json
import tempfile

from constants import *
from huggingface_hub import Repository
HF_TOKEN = os.environ.get("HF_TOKEN")

global data_component, filter_component


def upload_file(files):
    file_paths = [file.name for file in files]
    return file_paths

def add_new_eval(
    input_file,
    model_name_textbox: str,
    revision_name_textbox: str,
    model_link: str,
):
    if input_file is None:
        return "Error! Empty file!"

    upload_data=json.loads(input_file)
    submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
    submission_repo.git_pull()
    shutil.copyfile(CSV_DIR, os.path.join(SUBMISSION_NAME, f"{input_file}"))

    csv_data = pd.read_csv(CSV_DIR)

    if revision_name_textbox == '':
        col = csv_data.shape[0]
        model_name = model_name_textbox
    else:
        model_name = revision_name_textbox
        model_name_list = csv_data['Model Name (clickable)']
        name_list = [name.split(']')[0][1:] for name in model_name_list]
        if revision_name_textbox not in name_list:
            col = csv_data.shape[0]
        else:
            col = name_list.index(revision_name_textbox)    
    
    if model_link == '':
        model_name = model_name  # no url
    else:
        model_name = '[' + model_name + '](' + model_link + ')'

    # add new data
    new_data = [
        model_name
        ]
    for key in TASK_INFO:
        if key in upload_data:
            new_data.append(upload_data[key][0])
        else:
            new_data.append(0)
    csv_data.loc[col] = new_data
    csv_data = csv_data.to_csv(CSV_DIR, index=False)
    submission_repo.push_to_hub()
    return 0

def get_normalized_df(df):
    # final_score = df.drop('name', axis=1).sum(axis=1)
    # df.insert(1, 'Overall Score', final_score)
    normalize_df = df.copy()
    for column in normalize_df.columns[1:]:
        min_val = NORMALIZE_DIC[column]['Min']
        max_val = NORMALIZE_DIC[column]['Max']
        normalize_df[column] = (normalize_df[column] - min_val) / (max_val - min_val)
    return normalize_df

def calculate_selected_score(df, selected_columns):
    # selected_score = df[selected_columns].sum(axis=1)
    selected_QUALITY = [i for i in selected_columns if i in QUALITY_LIST]
    selected_SEMANTIC = [i for i in selected_columns if i in SEMANTIC_LIST]
    selected_quality_score = df[selected_QUALITY].sum(axis=1)/sum([DIM_WEIGHT[i] for i in selected_QUALITY])
    selected_semantic_score = df[selected_SEMANTIC].sum(axis=1)/sum([DIM_WEIGHT[i] for i in selected_SEMANTIC ])
    if selected_quality_score is None:
        return selected_semantic_score
    if selected_semantic_score is None:
        return selected_quality_score
    print(selected_semantic_score,selected_quality_score )
    selected_score =  (selected_quality_score * QUALITY_WEIGHT + selected_semantic_score * SEMANTIC_WEIGHT) / (QUALITY_WEIGHT + SEMANTIC_WEIGHT)
    return selected_score

def get_final_score(df, selected_columns):
    normalize_df = get_normalized_df(df)
    #final_score = normalize_df.drop('name', axis=1).sum(axis=1)
    for name in normalize_df.drop('Model Name (clickable)', axis=1):
        normalize_df[name] = normalize_df[name]*DIM_WEIGHT[name]
    quality_score = normalize_df[QUALITY_LIST].sum(axis=1)/sum([DIM_WEIGHT[i] for i in QUALITY_LIST])
    semantic_score = normalize_df[SEMANTIC_LIST].sum(axis=1)/sum([DIM_WEIGHT[i] for i in SEMANTIC_LIST ])
    final_score =  (quality_score * QUALITY_WEIGHT + semantic_score * SEMANTIC_WEIGHT) / (QUALITY_WEIGHT + SEMANTIC_WEIGHT)
    if 'Overall Score' in df:
        df['Overall Score'] = final_score
    else:
        df.insert(1, 'Overall Score', final_score)
    if 'Semantic Score' in df:
        df['Semantic Score'] = semantic_score
    else:
        df.insert(2, 'Semantic Score', semantic_score)
    if 'Quality Score' in df:
        df['Quality Score'] = quality_score
    else:
        df.insert(3, 'Quality Score', quality_score)
    selected_score = calculate_selected_score(normalize_df, selected_columns)
    if 'Selected Score' in df:
        df['Selected Score'] = selected_score
    else:
        df.insert(1, 'Selected Score', selected_score)
    return df

def get_baseline_df():
    submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
    submission_repo.git_pull()
    df = pd.read_csv(CSV_DIR)
    df = get_final_score(df, checkbox_group.value)
    df = df.sort_values(by="Selected Score", ascending=False)
    present_columns = MODEL_INFO + checkbox_group.value
    df = df[present_columns]
    df = convert_scores_to_percentage(df)
    return df

def get_all_df(selected_columns):
    submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
    submission_repo.git_pull()
    df = pd.read_csv(CSV_DIR)
    df = get_final_score(df, selected_columns)
    df = df.sort_values(by="Selected Score", ascending=False)
    return df

def convert_scores_to_percentage(df):
    # 对DataFrame中的每一列(除了'name'列)进行操作
    for column in df.columns[1:]:  # 假设第一列是'name'
        df[column] = round(df[column] * 100,2)  # 将分数转换为百分数
        df[column] = df[column].astype(str) + '%'
    return df

def choose_all_quailty():
    return gr.update(choices=QUALITY_LIST), QUALITY_LIST

def choose_all_semantic():
    return gr.update(choices=SEMANTIC_LIST),SEMANTIC_LIST

def disable_all():
    return gr.update(choices=[]),[]

def enable_all():
    return gr.update(choices=TASK_INFO),TASK_INFO 

def on_filter_model_size_method_change(selected_columns):
    updated_data = get_all_df(selected_columns)
    print(updated_data)
    # columns:
    selected_columns = [item for item in TASK_INFO if item in selected_columns]
    present_columns = MODEL_INFO + selected_columns
    updated_data = updated_data[present_columns]
    updated_data = updated_data.sort_values(by="Selected Score", ascending=False)
    updated_data = convert_scores_to_percentage(updated_data)
    updated_headers = present_columns
    update_datatype = [DATA_TITILE_TYPE[COLUMN_NAMES.index(x)] for x in updated_headers]
    # print(updated_data,present_columns,update_datatype)
    filter_component = gr.components.Dataframe(
        value=updated_data, 
        headers=updated_headers,
        type="pandas", 
        datatype=update_datatype,
        interactive=False,
        visible=True,
        )
    return filter_component#.value

block = gr.Blocks()


with block:
    gr.Markdown(
        LEADERBORAD_INTRODUCTION
    )
    with gr.Tabs(elem_classes="tab-buttons") as tabs:
        with gr.TabItem("📊 VBench", elem_id="vbench-tab-table", id=1):
            with gr.Row():
                with gr.Accordion("Citation", open=False):
                    citation_button = gr.Textbox(
                        value=CITATION_BUTTON_TEXT,
                        label=CITATION_BUTTON_LABEL,
                        elem_id="citation-button",
                        lines=10,
                    )
    
            gr.Markdown(
                TABLE_INTRODUCTION
            )
            with gr.Row():
                with gr.Column():
                    choosen_q = gr.Button("Select ALL Quality")
                    choosen_s = gr.Button("Select ALL Semantic")
                    enable_b = gr.Button("Select All")
                    disable_b = gr.Button("Deselect All")
                    choosen_q.click(choose_all_quailty, inputs=None, outputs=[checkbox_group, data_component])
                    choosen_s.click(choose_all_semantic, inputs=None, outputs=[checkbox_group, data_component])
                    enable_b.click(enable_all, inputs=None, outputs=[checkbox_group, data_component])
                    disable_b.click(disable_all, inputs=None, outputs=[checkbox_group, data_component])
                # selection for column part:
                checkbox_group = gr.CheckboxGroup(
                    choices=TASK_INFO,
                    value=DEFAULT_INFO,
                    label="Evaluation Dimension",
                    interactive=True,
                )

            data_component = gr.components.Dataframe(
                value=get_baseline_df, 
                headers=COLUMN_NAMES,
                type="pandas", 
                datatype=DATA_TITILE_TYPE,
                interactive=False,
                visible=True,
                )


            checkbox_group.change(fn=on_filter_model_size_method_change, inputs=[ checkbox_group], outputs=data_component)

        # table 2
        with gr.TabItem("📝 About", elem_id="mvbench-tab-table", id=2):
            gr.Markdown(LEADERBORAD_INFO, elem_classes="markdown-text")
        
        # table 3 
        with gr.TabItem("🚀 Submit here! ", elem_id="mvbench-tab-table", id=3):
            gr.Markdown(LEADERBORAD_INTRODUCTION, elem_classes="markdown-text")

            with gr.Row():
                gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")

            with gr.Row():
                gr.Markdown("# ✉️✨ Submit your model evaluation json file here!", elem_classes="markdown-text")

            with gr.Row():
                with gr.Column():
                    model_name_textbox = gr.Textbox(
                        label="Model name", placeholder="LaVie"
                        )
                    revision_name_textbox = gr.Textbox(
                        label="Revision Model Name", placeholder="LaVie"
                    )

                with gr.Column():
                    model_link = gr.Textbox(
                        label="Model Link", placeholder="https://huggingface.co/decapoda-research/llama-7b-hf"
                    )


            with gr.Column():

                input_file = gr.components.File(label = "Click to Upload a json File", file_count="single", type='binary')
                submit_button = gr.Button("Submit Eval")
    
                submission_result = gr.Markdown()
                submit_button.click(
                    add_new_eval,
                    inputs = [
                        input_file,
                        model_name_textbox,
                        revision_name_textbox,
                        model_link,
                    ],
                )


    def refresh_data():
        value1 = get_baseline_df()
        return value1

    with gr.Row():
        data_run = gr.Button("Refresh")
        data_run.click(on_filter_model_size_method_change, inputs=[checkbox_group], outputs=data_component)


block.launch()