File size: 2,203 Bytes
73a9525
 
 
1edab07
 
 
 
 
4c24185
ca872a1
 
 
 
 
 
 
 
38d9f8d
1edab07
4c24185
1edab07
ca872a1
 
73a9525
4c24185
ca872a1
ed56ab7
1edab07
 
ed56ab7
4c24185
1edab07
ed56ab7
4c24185
 
 
 
 
 
 
 
1edab07
4c24185
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import streamlit as st
import pandas as pd

# CSS样式
st.markdown("""
<style>
body {
    color: #fff;
    background-color: #111;
}
/* 调整全局容器宽度为屏幕的 90%,自适应屏幕 */
.css-1d391kg {
    padding: 1rem 1rem;  /* 调整内边距 */
}
/* 设置主体内容最大宽度为100%,自适应屏幕 */
.css-1lcbmhc {
    max-width: 100%;
}
.stDataFrame {
    font-family: Helvetica;
    font-size: 16px;
    width: 100%;
    min-width: 100%;
}
h1 {
    color: #ffdf92;
}
</style>
""", unsafe_allow_html=True)

# 标题
st.title('AEOLLM leaderboard')

# 描述
st.markdown("""
This leaderboard is used to show the performance of the **automation evaluation methods of LLMs** submitted by the **AEOLLM team** on four tasks:
- Summary Generation (SG)
- Non-Factoid QA (NFQA)
- Dialogue Generation (DG)
- Text Expansion (TE).
""", unsafe_allow_html=True)
# 创建示例数据
SG = {
    "methods": ["Model A", "Model B", "Model C"],
    "team": ["U1", "U2", "U3"],
    "acc": [0.75, 0.64, 0.83],
    "tau": [0.05, 0.28, 0.16],
    "s": [0.12, 0.27, 0.18]
}
df1 = pd.DataFrame(SG)

NFQA = {
    "methods": ["Model A", "Model B", "Model C"],
    "team": ["U1", "U2", "U3"],
    "acc": [0.75, 0.64, 0.83],
    "tau": [0.05, 0.28, 0.16],
    "s": [0.12, 0.27, 0.18]
}
df2 = pd.DataFrame(NFQA)

DG = {
    "methods": ["Model A", "Model B", "Model C"],
    "team": ["U1", "U2", "U3"],
    "acc": [0.75, 0.64, 0.83],
    "tau": [0.05, 0.28, 0.16],
    "s": [0.12, 0.27, 0.18]
}
df3 = pd.DataFrame(DG)

TE = {
    "methods": ["Model A", "Model B", "Model C"],
    "team": ["U1", "U2", "U3"],
    "acc": [0.75, 0.64, 0.83],
    "tau": [0.05, 0.28, 0.16],
    "s": [0.12, 0.27, 0.18]
}
df4 = pd.DataFrame(TE)

# 创建标签页
tab1, tab2, tab3, tab4 = st.tabs(["SG", "NFQA", "DG", "TE"])

# 在标签页 1 中添加内容
with tab1:
    st.header("Summary Generation")
    st.dataframe(df1)

# 在标签页 2 中添加内容
with tab2:
    st.header("Non-Factoid QA")
    st.dataframe(df2)

# 在标签页 3 中添加内容
with tab3:
    st.header("Dialogue Generation")
    st.dataframe(df3)

# 在标签页 4 中添加内容
with tab4:
    st.header("Text Expansion")
    st.dataframe(df4)