陈俊杰 commited on
Commit
eaa1d85
1 Parent(s): 4d2570c

cjj: updateRank

Browse files
Files changed (1) hide show
  1. app.py +40 -40
app.py CHANGED
@@ -23,69 +23,69 @@ st.title('🏆AEOLLM Leaderboard')
23
  # 描述
24
  st.markdown("""
25
  This leaderboard is used to show the performance of the **automatic evaluation methods of LLMs** submitted by the **AEOLLM team** on four tasks:
 
 
26
  - Summary Generation (SG)
27
  - Non-Factoid QA (NFQA)
28
- - Dialogue Generation (DG)
29
- - Text Expansion (TE).
30
 
31
  Details of AEOLLLM can be found at the link: [https://aeollm.github.io/](https://aeollm.github.io/)
32
  """, unsafe_allow_html=True)
33
  # 创建示例数据
34
- SG = {
35
- "methods": ["Model A", "Model B", "Model C"],
36
- "team": ["U1", "U2", "U3"],
37
- "acc": [0.75, 0.64, 0.83],
38
- "tau": [0.05, 0.28, 0.16],
39
- "s": [0.12, 0.27, 0.18],
 
40
  }
41
- df1 = pd.DataFrame(SG)
42
 
43
- NFQA = {
44
- "methods": ["Model A", "Model B", "Model C"],
45
- "team": ["U1", "U2", "U3"],
46
- "acc": [0.75, 0.64, 0.83],
47
- "tau": [0.05, 0.28, 0.16],
48
- "s": [0.12, 0.27, 0.18]
49
  }
50
- df2 = pd.DataFrame(NFQA)
51
 
52
- DG = {
53
- "methods": ["Model A", "Model B", "Model C"],
54
- "team": ["U1", "U2", "U3"],
55
- "acc": [0.75, 0.64, 0.83],
56
- "tau": [0.05, 0.28, 0.16],
57
- "s": [0.12, 0.27, 0.18]
58
  }
59
- df3 = pd.DataFrame(DG)
60
 
61
- TE = {
62
- "methods": ["Model A", "Model B", "Model C"],
63
- "team": ["U1", "U2", "U3"],
64
- "acc": [0.75, 0.64, 0.83],
65
- "tau": [0.05, 0.28, 0.16],
66
- "s": [0.12, 0.27, 0.18]
67
  }
68
- df4 = pd.DataFrame(TE)
69
 
70
  # 创建标签页
71
- tab1, tab2, tab3, tab4 = st.tabs(["SG", "NFQA", "DG", "TE"])
72
 
73
- # 在标签页 1 中添加内容
74
  with tab1:
75
- st.header("Summary Generation")
76
  st.dataframe(df1, use_container_width=True)
77
 
78
- # 在标签页 2 中添加内容
79
  with tab2:
80
- st.header("Non-Factoid QA")
81
  st.dataframe(df2, use_container_width=True)
82
 
83
- # 在标签页 3 中添加内容
84
  with tab3:
85
- st.header("Dialogue Generation")
86
  st.dataframe(df3, use_container_width=True)
87
 
88
- # 在标签页 4 中添加内容
89
  with tab4:
90
- st.header("Text Expansion")
91
- st.dataframe(df4, use_container_width=True, )
 
23
  # 描述
24
  st.markdown("""
25
  This leaderboard is used to show the performance of the **automatic evaluation methods of LLMs** submitted by the **AEOLLM team** on four tasks:
26
+ - Dialogue Generation (DG)
27
+ - Text Expansion (TE)
28
  - Summary Generation (SG)
29
  - Non-Factoid QA (NFQA)
 
 
30
 
31
  Details of AEOLLLM can be found at the link: [https://aeollm.github.io/](https://aeollm.github.io/)
32
  """, unsafe_allow_html=True)
33
  # 创建示例数据
34
+
35
+ DG = {
36
+ "methods": ["chatglm3-6b", "baichuan2-13b", "chatglm-pro", "gpt-4o-mini"],
37
+ "team": ["baseline", "baseline", "baseline", "baseline"],
38
+ "accuracy": [0.5806, 0.5483, 0.6001, 0.6472],
39
+ "kendall's tau": [0.3243, 0.1739, 0.3042, 0.4167],
40
+ "spearman": [0.3505, 0.1857, 0.3264, 0.4512]
41
  }
42
+ df1 = pd.DataFrame(DG)
43
 
44
+ TE = {
45
+ "methods": ["chatglm3-6b", "baichuan2-13b", "chatglm-pro", "gpt-4o-mini"],
46
+ "team": ["baseline", "baseline", "baseline", "baseline"],
47
+ "accuracy": [0.5107, 0.5050, 0.5461, 0.5581],
48
+ "kendall's tau": [0.1281, 0.0635, 0.2716, 0.3864],
49
+ "spearman": [0.1352, 0.0667, 0.2867, 0.4157]
50
  }
51
+ df2 = pd.DataFrame(TE)
52
 
53
+ SG = {
54
+ "methods": ["chatglm3-6b", "baichuan2-13b", "chatglm-pro", "gpt-4o-mini"],
55
+ "team": ["baseline", "baseline", "baseline", "baseline"],
56
+ "accuracy": [0.6504, 0.6014, 0.7162, 0.7441],
57
+ "kendall's tau": [0.3957, 0.2688, 0.5092, 0.5001],
58
+ "spearman": [0.4188, 0.2817, 0.5403, 0.5405],
59
  }
60
+ df3 = pd.DataFrame(SG)
61
 
62
+ NFQA = {
63
+ "methods": ["chatglm3-6b", "baichuan2-13b", "chatglm-pro", "gpt-4o-mini"],
64
+ "team": ["baseline", "baseline", "baseline", "baseline"],
65
+ "accuracy": [0.5935, 0.5817, 0.7000, 0.7203],
66
+ "kendall's tau": [0.2332, 0.2389, 0.4440, 0.4235],
67
+ "spearman": [0.2443, 0.2492, 0.4630, 0.4511]
68
  }
69
+ df4 = pd.DataFrame(NFQA)
70
 
71
  # 创建标签页
72
+ tab1, tab2, tab3, tab4 = st.tabs(["DG", "TE", "SG", "NFQA"])
73
 
74
+ # 在标签页 3 中添加内容
75
  with tab1:
76
+ st.header("Task: Dialogue Generation; Dataset: DialyDialog")
77
  st.dataframe(df1, use_container_width=True)
78
 
79
+ # 在标签页 4 中添加内容
80
  with tab2:
81
+ st.header("Task: Text Expansion; Dataset: WritingPrompts")
82
  st.dataframe(df2, use_container_width=True)
83
 
 
84
  with tab3:
85
+ st.header("Task: Summary Generation; Dataset: Xsum")
86
  st.dataframe(df3, use_container_width=True)
87
 
88
+ # 在标签页 2 中添加内容
89
  with tab4:
90
+ st.header("Task: Non-Factoid QA; Dataset: NF_CATS")
91
+ st.dataframe(df4, use_container_width=True)