Spaces:
Sleeping
Sleeping
import streamlit as st | |
from my_model.results.demo import ResultDemonstrator | |
from my_model.config import evaluation_config as config | |
def run_demo(): | |
""" | |
Run the interactive Streamlit demo for visualizing model evaluation results and analysis. | |
""" | |
st.set_page_config(page_title="Model Evaluation Results and Analyses", | |
layout="wide", | |
initial_sidebar_state="expanded") | |
demo = ResultDemonstrator() # Instantiate the ResultDemonstrator class | |
col1, col2 = st.columns([1, 4]) | |
with col1: | |
# User selects the evaluation analysis aspect | |
section_type = st.radio("Select Evaluation Aspect", ["Evaluation Results & Analysis", 'Evaluation Samples']) | |
# Only show analysis type if the section type is "Evaluation Results & Analysis" | |
if section_type == "Evaluation Results & Analysis": | |
analysis_type = st.radio("Select Type", ["Main & Ablation Results", "Results per Question Category", | |
"Prompt Length (token count) Impact on Performance"], index=2) | |
if analysis_type == "Prompt Length (token count) Impact on Performance": | |
# Based on the selection, other options appear | |
model_name = st.radio("Select Model Size", config.MODEL_NAMES) | |
score_name = st.radio("Select Score Type", ["VQA Score", "Exact Match"]) | |
elif section_type == 'Evaluation Samples': | |
samples_button = st.button("Generate Random Samples") | |
with col2: | |
if section_type == "Evaluation Results & Analysis": | |
if analysis_type == "Prompt Length (token count) Impact on Performance": | |
for conf in config.MODEL_CONFIGURATIONS: | |
with st.expander(conf): | |
demo.plot_token_count_vs_scores(conf, model_name, score_name) | |
elif analysis_type == "Main & Ablation Results": | |
demo.display_main_results() | |
elif analysis_type == "Results per Question Category": | |
demo.display_ablation_results_per_question_category() | |
elif section_type == 'Evaluation Samples': | |
if samples_button: | |
demo.show_samples(3) | |