|
import streamlit as st |
|
import pandas as pd |
|
|
|
|
|
st.markdown(""" |
|
<style> |
|
body { |
|
color: #fff; |
|
background-color: #333; |
|
} |
|
.stDataFrame { |
|
font-family: Arial; |
|
font-size: 16px; |
|
} |
|
.stHeader { |
|
color: #ff6347; |
|
} |
|
div.stButton > button:first-child { |
|
background-color: #ff6347; |
|
color: #fff; |
|
} |
|
</style> |
|
""", unsafe_allow_html=True) |
|
|
|
|
|
st.title('AEOLLM leaderboard') |
|
st.write("This leaderboard is used to show the performance of the automation evaluation methods of LLMs submitted by the AEOLLM team on four tasks: Summary Generation (SG), Non-Factoid QA (NFQA), Dialogue Generation (DG), Text Expansion (TE).") |
|
|
|
|
|
def create_data(): |
|
return { |
|
"methods": ["Model A", "Model B", "Model C"], |
|
"team": ["U1", "U2", "U3"], |
|
"acc": [0.75, 0.64, 0.83], |
|
"tau": [0.05, 0.28, 0.16], |
|
"s": [0.12, 0.27, 0.18] |
|
} |
|
|
|
df1 = pd.DataFrame(create_data()) |
|
df2 = pd.DataFrame(create_data()) |
|
df3 = pd.DataFrame(create_data()) |
|
df4 = pd.DataFrame(create_data()) |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
with col1: |
|
tab1, tab2 = st.tabs(["SG", "NFQA"]) |
|
with tab1: |
|
st.header("Summary Generation") |
|
st.dataframe(df1) |
|
with tab2: |
|
st.header("Non-Factoid QA") |
|
st.dataframe(df2) |
|
|
|
with col2: |
|
tab3, tab4 = st.tabs(["DG", "TE"]) |
|
with tab3: |
|
st.header("Dialogue Generation") |
|
st.dataframe(df3) |
|
with tab4: |
|
st.header("Text Expansion") |
|
st.dataframe(df4) |
|
|