|
import streamlit as st |
|
import pandas as pd |
|
import datetime |
|
from huggingface_hub import HfApi |
|
import json |
|
|
|
|
|
MAX_SUBMISSIONS_PER_DAY = 3 |
|
submissions_log = {} |
|
|
|
|
|
def get_user_id(): |
|
api = HfApi() |
|
user_info = api.whoami(token=st.secrets["hf_api_token"]) |
|
return user_info["name"] |
|
|
|
|
|
def check_submission_limit(user_id): |
|
today = datetime.date.today() |
|
if user_id in submissions_log: |
|
if submissions_log[user_id]["date"] == today: |
|
return submissions_log[user_id]["count"] < MAX_SUBMISSIONS_PER_DAY |
|
else: |
|
submissions_log[user_id] = {"date": today, "count": 0} |
|
return True |
|
else: |
|
submissions_log[user_id] = {"date": today, "count": 0} |
|
return True |
|
|
|
|
|
def update_submission_count(user_id): |
|
submissions_log[user_id]["count"] += 1 |
|
|
|
|
|
st.markdown(""" |
|
<style> |
|
h1 { |
|
font-size: 2.5em; /* 标题字体大小 */ |
|
} |
|
.stDataFrame { |
|
font-family: Helvetica; |
|
} |
|
.dataframe th, .dataframe td { |
|
width: auto; |
|
min-width: 500px; |
|
} |
|
</style> |
|
""", unsafe_allow_html=True) |
|
|
|
|
|
st.title('🏆AEOLLM Leaderboard') |
|
|
|
|
|
st.markdown(""" |
|
This leaderboard is used to show the performance of the **automatic evaluation methods of LLMs** submitted by the **AEOLLM team** on four tasks: |
|
- Summary Generation (SG) |
|
- Non-Factoid QA (NFQA) |
|
- Dialogue Generation (DG) |
|
- Text Expansion (TE). |
|
|
|
Details of AEOLLLM can be found at the link: [https://cjj826.github.io/AEOLLM/](https://cjj826.github.io/AEOLLM/) |
|
|
|
Submit your result here (.json): |
|
""", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
SG = { |
|
"methods": ["Model A", "Model B", "Model C"], |
|
"team": ["U1", "U2", "U3"], |
|
"acc": [0.75, 0.64, 0.83], |
|
"tau": [0.05, 0.28, 0.16], |
|
"s": [0.12, 0.27, 0.18], |
|
} |
|
df1 = pd.DataFrame(SG) |
|
|
|
NFQA = { |
|
"methods": ["Model A", "Model B", "Model C"], |
|
"team": ["U1", "U2", "U3"], |
|
"acc": [0.75, 0.64, 0.83], |
|
"tau": [0.05, 0.28, 0.16], |
|
"s": [0.12, 0.27, 0.18] |
|
} |
|
df2 = pd.DataFrame(NFQA) |
|
|
|
DG = { |
|
"methods": ["Model A", "Model B", "Model C"], |
|
"team": ["U1", "U2", "U3"], |
|
"acc": [0.75, 0.64, 0.83], |
|
"tau": [0.05, 0.28, 0.16], |
|
"s": [0.12, 0.27, 0.18] |
|
} |
|
df3 = pd.DataFrame(DG) |
|
|
|
TE = { |
|
"methods": ["Model A", "Model B", "Model C"], |
|
"team": ["U1", "U2", "U3"], |
|
"acc": [0.75, 0.64, 0.83], |
|
"tau": [0.05, 0.28, 0.16], |
|
"s": [0.12, 0.27, 0.18] |
|
} |
|
df4 = pd.DataFrame(TE) |
|
|
|
|
|
tab1, tab2, tab3, tab4 = st.tabs(["SG", "NFQA", "DG", "TE"]) |
|
|
|
|
|
with tab1: |
|
st.header("Summary Generation") |
|
st.dataframe(df1, use_container_width=True) |
|
|
|
|
|
with tab2: |
|
st.header("Non-Factoid QA") |
|
st.dataframe(df2, use_container_width=True) |
|
|
|
|
|
with tab3: |
|
st.header("Dialogue Generation") |
|
st.dataframe(df3, use_container_width=True) |
|
|
|
|
|
with tab4: |
|
st.header("Text Expansion") |
|
st.dataframe(df4, use_container_width=True, ) |
|
|