File size: 1,575 Bytes
0adb6ea
 
 
e5ad674
0adb6ea
 
 
 
 
 
 
 
 
 
 
 
 
aa9be2a
0adb6ea
e5ad674
0adb6ea
 
e5ad674
0adb6ea
 
e5ad674
0adb6ea
 
 
 
 
 
 
 
 
 
e5ad674
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import gradio as gr # type: ignore
import pandas as pd
from sotopia_space.constants import MODEL_OPTIONS
from sotopia_space.utils import post_processing

LP_MODE = "v2"
original_df, ablation_df = None, None
LP_original_dfs = {} 
DEFAULT_LP = 0.5 

def benchmark_table():
    global original_df, ablation_df
    global LP_original_dfs, LP_MODE
    
    gr.Markdown(f"**Version**: sotopia (v1.01; 2024.04.22) | **# Examples**: 7200 | **# Models**: {len(MODEL_OPTIONS)} | **# Comparisons**: x", elem_classes="markdown-text")
                
    with gr.TabItem("Vs GPT-3.5", elem_id="od-benchmark-tab-table-ablation", id=0, elem_classes="subtab"):
        default_main_df = pd.read_json('data_dir/models_vs_gpt35.jsonl', lines=True)
        default_main_df = default_main_df.sort_values(by="GOAL [0, 10]", ascending=False)
        default_main_df = post_processing(default_main_df, None)
        # add a Rank column to the first columnn (starting from 1)
        default_main_df.insert(0, "Rank", range(1, 1 + len(default_main_df)))
        
        with gr.Row():
            with gr.Column(scale=4):
                gr.Markdown("<h3>**Vs GPT3.5**: The interlocutors are compared against GPT-3.5, the baseline model.")
        TYPES = ["number", "markdown", "number"]
        leaderboard_table = gr.components.Dataframe(
            value=default_main_df,
            datatype=TYPES,
            # max_rows=None,
            height=1000,
            elem_id="leaderboard-table",
            interactive=False,
            visible=True,
            min_width=60,
            )