陈俊杰 commited on
Commit
4c24185
1 Parent(s): 1edab07
Files changed (1) hide show
  1. app.py +71 -42
app.py CHANGED
@@ -6,57 +6,86 @@ st.markdown("""
6
  <style>
7
  body {
8
  color: #fff;
9
- background-color: #333;
 
10
  }
11
  .stDataFrame {
12
- font-family: Arial;
13
  font-size: 16px;
14
  }
15
- .stHeader {
16
  color: #ff6347;
17
  }
18
- div.stButton > button:first-child {
19
- background-color: #ff6347;
20
- color: #fff;
21
- }
22
  </style>
23
  """, unsafe_allow_html=True)
24
 
25
- # 标题和描述
26
  st.title('AEOLLM leaderboard')
27
- st.write("This leaderboard is used to show the performance of the automation evaluation methods of LLMs submitted by the AEOLLM team on four tasks: Summary Generation (SG), Non-Factoid QA (NFQA), Dialogue Generation (DG), Text Expansion (TE).")
28
 
 
 
 
 
 
 
 
 
29
  # 创建示例数据
30
- def create_data():
31
- return {
32
- "methods": ["Model A", "Model B", "Model C"],
33
- "team": ["U1", "U2", "U3"],
34
- "acc": [0.75, 0.64, 0.83],
35
- "tau": [0.05, 0.28, 0.16],
36
- "s": [0.12, 0.27, 0.18]
37
- }
38
-
39
- df1 = pd.DataFrame(create_data())
40
- df2 = pd.DataFrame(create_data())
41
- df3 = pd.DataFrame(create_data())
42
- df4 = pd.DataFrame(create_data())
43
-
44
- # 使用列功能进行布局
45
- col1, col2 = st.columns(2)
46
- with col1:
47
- tab1, tab2 = st.tabs(["SG", "NFQA"])
48
- with tab1:
49
- st.header("Summary Generation")
50
- st.dataframe(df1)
51
- with tab2:
52
- st.header("Non-Factoid QA")
53
- st.dataframe(df2)
54
-
55
- with col2:
56
- tab3, tab4 = st.tabs(["DG", "TE"])
57
- with tab3:
58
- st.header("Dialogue Generation")
59
- st.dataframe(df3)
60
- with tab4:
61
- st.header("Text Expansion")
62
- st.dataframe(df4)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  <style>
7
  body {
8
  color: #fff;
9
+ background-color: #111;
10
+ max-width: 90%;
11
  }
12
  .stDataFrame {
13
+ font-family: Helvetica;
14
  font-size: 16px;
15
  }
16
+ h1 {
17
  color: #ff6347;
18
  }
 
 
 
 
19
  </style>
20
  """, unsafe_allow_html=True)
21
 
22
+ # 标题
23
  st.title('AEOLLM leaderboard')
 
24
 
25
+ # 描述
26
+ st.markdown("""
27
+ This leaderboard is used to show the performance of the **automation evaluation methods of LLMs** submitted by the **AEOLLM team** on four tasks:
28
+ - Summary Generation (SG)
29
+ - Non-Factoid QA (NFQA)
30
+ - Dialogue Generation (DG)
31
+ - Text Expansion (TE).
32
+ """, unsafe_allow_html=True)
33
  # 创建示例数据
34
+ SG = {
35
+ "methods": ["Model A", "Model B", "Model C"],
36
+ "team": ["U1", "U2", "U3"],
37
+ "acc": [0.75, 0.64, 0.83],
38
+ "tau": [0.05, 0.28, 0.16],
39
+ "s": [0.12, 0.27, 0.18]
40
+ }
41
+ df1 = pd.DataFrame(SG)
42
+
43
+ NFQA = {
44
+ "methods": ["Model A", "Model B", "Model C"],
45
+ "team": ["U1", "U2", "U3"],
46
+ "acc": [0.75, 0.64, 0.83],
47
+ "tau": [0.05, 0.28, 0.16],
48
+ "s": [0.12, 0.27, 0.18]
49
+ }
50
+ df2 = pd.DataFrame(NFQA)
51
+
52
+ DG = {
53
+ "methods": ["Model A", "Model B", "Model C"],
54
+ "team": ["U1", "U2", "U3"],
55
+ "acc": [0.75, 0.64, 0.83],
56
+ "tau": [0.05, 0.28, 0.16],
57
+ "s": [0.12, 0.27, 0.18]
58
+ }
59
+ df3 = pd.DataFrame(DG)
60
+
61
+ TE = {
62
+ "methods": ["Model A", "Model B", "Model C"],
63
+ "team": ["U1", "U2", "U3"],
64
+ "acc": [0.75, 0.64, 0.83],
65
+ "tau": [0.05, 0.28, 0.16],
66
+ "s": [0.12, 0.27, 0.18]
67
+ }
68
+ df4 = pd.DataFrame(TE)
69
+
70
+ # 创建标签页
71
+ tab1, tab2, tab3, tab4 = st.tabs(["SG", "NFQA", "DG", "TE"])
72
+
73
+ # 在标签页 1 中添加内容
74
+ with tab1:
75
+ st.header("Summary Generation")
76
+ st.dataframe(df1)
77
+
78
+ # 在标签页 2 中添加内容
79
+ with tab2:
80
+ st.header("Non-Factoid QA")
81
+ st.dataframe(df2)
82
+
83
+ # 在标签页 3 中添加内容
84
+ with tab3:
85
+ st.header("Dialogue Generation")
86
+ st.dataframe(df3)
87
+
88
+ # 在标签页 4 中添加内容
89
+ with tab4:
90
+ st.header("Text Expansion")
91
+ st.dataframe(df4)