nyanko7's picture
Squashed
f060dfc verified
import pandas as pd
import streamlit as st
import time
from collections import defaultdict
from streamlit_image_select import image_select
import requests
import os
st.set_page_config(layout="wide")
description = """
# Anime Leaderboard
Text to Image (Anime/Illustration) Generation Leaderboard.
This leaderboard is just for fun and does not reflect the actual performance of the models.
## How to Use
- Select the image that best reflects the given prompt.
- Your selections contribute to the global leaderboard.
- View your personal leaderboard after making at least 30 selections.
## Data
- Data Source: [nyanko7/image-samples](https://huggingface.co/datasets/nyanko7/image-samples)
- Calling for submissions: [open issue](https://huggingface.co/spaces/nyanko7/text-to-anime-arena/discussions/new) or contact me to submit your model
- Warning: Some images may contain NSFW content.
"""
if 'selections' not in st.session_state:
st.session_state['selections'] = []
if 'selection_count' not in st.session_state:
st.session_state['selection_count'] = 0
if 'last_pair' not in st.session_state:
st.session_state['last_pair'] = None
if 'user_id' not in st.session_state:
st.session_state['user_id'] = None
st.sidebar.markdown(description)
SERVER_URL = os.getenv("W_SERVER") # Replace with your actual server URL
def get_next_pair():
try:
response = requests.get(f"{SERVER_URL}/next_pair")
if response.status_code == 200:
return response.json()
else:
print(response)
st.error("Failed to fetch next pair from server")
return None
except Exception as e:
print(e)
st.error("Failed to fetch next pair from server")
return None
if "pair" not in st.session_state:
st.session_state["pair"] = get_next_pair()
def submit_selection(selection_result):
headers = {}
if st.session_state['user_id']:
headers['User-ID'] = st.session_state['user_id']
try:
response = requests.post(f"{SERVER_URL}/submit_selection", json=selection_result, headers=headers)
if response.status_code == 200:
response_data = response.json()
if 'user_id' in response_data:
st.session_state['user_id'] = response_data['user_id']
else:
st.error(f"Failed to submit selection to server")
except Exception as e:
st.error(f"Failed to submit selection to server")
def get_leaderboard_data():
try:
response = requests.get(f"{SERVER_URL}/leaderboard")
if response.status_code == 200:
return response.json()
else:
st.error("Failed to fetch leaderboard data from server")
return None
except Exception as e:
st.error("Failed to fetch leaderboard data from server")
return None
import io
from PIL import Image
def open_image_from_url(image_url):
response = requests.get(image_url, stream=True)
response.raise_for_status()
return Image.open(io.BytesIO(response.content))
@st.fragment
def arena():
pair = st.session_state["pair"]
image_url1, model_a = pair["image1"], pair["model_a"]
image_url2, model_b = pair["image2"], pair["model_b"]
prompt = pair["prompt"]
st.markdown(f"**Which image best reflects this prompt?**")
st.info(
f"""
Prompt: {prompt}
""",
icon="⏳",
)
# read image datafrom url
image_a = open_image_from_url(image_url1)
image_b = open_image_from_url(image_url2)
images = [image_a, image_b]
models = [model_a, model_b]
idx = image_select(
label="Select the image you prefer",
images=images,
index=-1,
center=True,
height=700,
return_value="index"
)
if st.button("Skip"):
st.session_state["pair"] = get_next_pair()
st.rerun(scope="fragment")
if "last_state" in st.session_state and st.session_state["last_state"] is not None:
st.markdown(st.session_state["last_state"])
if idx != -1:
selection_result = {
"model_a": model_a,
"model_b": model_b,
"winner": "model_a" if idx == 0 else "model_b",
"time": time.time()
}
st.session_state["selections"].append(selection_result)
st.session_state["selection_count"] += 1
st.session_state["last_state"] = f"[Selection #{st.session_state['selection_count']}] You selected Image `#{idx+1}` - Model: {models[idx]}"
submit_selection(selection_result)
st.session_state["pair"] = get_next_pair()
st.rerun(scope="fragment")
@st.fragment
def leaderboard():
data = get_leaderboard_data()
if data is None:
return
st.markdown("## Global Leaderboard")
st.markdown("""
This leaderboard shows the performance of different models based on user selections.
- **Elo Rating**: A relative rating system. Higher scores indicate better performance.
- **Win Rate**: The percentage of times a model was chosen when presented.
- **#Selections**: Total number of times this model was presented in a pair.
""")
st.warning("This leaderboard is just for fun and **does not reflect the actual performance of the models.**")
df = pd.DataFrame(data["leaderboard"])[["Model", "Elo Rating", "Win Rate", "#Selections"]].reset_index(drop=True)
st.dataframe(df, hide_index=True)
@st.fragment
def my_leaderboard():
if "selections" not in st.session_state or len(st.session_state["selections"]) < 30:
st.markdown("Select over 30 images to see your personal leaderboard")
uploaded_files = st.file_uploader("Or load your previous selections:", accept_multiple_files=False)
if uploaded_files:
logs = pd.read_csv(uploaded_files)
if "Unnamed: 0" in logs.columns:
logs.drop(columns=["Unnamed: 0"], inplace=True)
st.session_state["selections"] = logs.to_dict(orient="records")
st.rerun()
return
selections = pd.DataFrame(st.session_state["selections"])
st.markdown("## Personal Leaderboard")
st.markdown("""
This leaderboard is based on your personal selections.
- **Elo Rating**: Calculated from your choices. Higher scores indicate models you prefer.
- **Win Rate**: The percentage of times you chose each model when it was presented.
- **#Selections**: Number of times you've seen this model in a pair.
""")
elo_ratings = compute_elo(selections.to_dict('records'))
win_rates = compute_win_rates(selections.to_dict('records'))
selection_counts = compute_selection_counts(selections.to_dict('records'))
data = []
for model in set(selections['model_a'].unique()) | set(selections['model_b'].unique()):
data.append({
"Model": model,
"Elo Rating": round(elo_ratings[model], 2),
"Win Rate": f"{win_rates[model]*100:.2f}%",
"#Selections": selection_counts[model]
})
df = pd.DataFrame(data)
df = df.sort_values("Elo Rating", ascending=False)
df = df[["Model", "Elo Rating", "Win Rate", "#Selections"]].reset_index(drop=True)
st.dataframe(df, hide_index=True)
st.markdown("## Your Recent Selections")
st.dataframe(selections.tail(20))
# download data
st.download_button('Download your selection data as CSV', selections.to_csv().encode('utf-8'), "my_selections.csv", "text/csv")
def compute_elo(battles, K=4, SCALE=400, BASE=10, INIT_RATING=1000):
rating = defaultdict(lambda: INIT_RATING)
for battle in battles:
model_a, model_b, winner = battle['model_a'], battle['model_b'], battle['winner']
ra, rb = rating[model_a], rating[model_b]
ea = 1 / (1 + BASE ** ((rb - ra) / SCALE))
eb = 1 / (1 + BASE ** ((ra - rb) / SCALE))
sa = 1 if winner == "model_a" else 0 if winner == "model_b" else 0.5
rating[model_a] += K * (sa - ea)
rating[model_b] += K * (1 - sa - eb)
return rating
def compute_win_rates(battles):
win_counts = defaultdict(int)
battle_counts = defaultdict(int)
for battle in battles:
model_a, model_b, winner = battle['model_a'], battle['model_b'], battle['winner']
if winner == "model_a":
win_counts[model_a] += 1
elif winner == "model_b":
win_counts[model_b] += 1
battle_counts[model_a] += 1
battle_counts[model_b] += 1
return {model: win_counts[model] / battle_counts[model] if battle_counts[model] > 0 else 0
for model in set(win_counts.keys()) | set(battle_counts.keys())}
def compute_selection_counts(battles):
selection_counts = defaultdict(int)
for battle in battles:
selection_counts[battle['model_a']] += 1
selection_counts[battle['model_b']] += 1
return selection_counts
pages = [
st.Page(arena),
st.Page(leaderboard),
st.Page(my_leaderboard)
]
st.navigation(pages).run()