KB-VQA-E / my_model /tabs /results.py
m7mdal7aj's picture
Update my_model/tabs/results.py
6d94c7b verified
import streamlit as st
from my_model.results.demo import ResultDemonstrator
from my_model.config import evaluation_config as config
def run_demo()-> None:
"""
Run the interactive Streamlit demo for visualizing model evaluation results and analysis.
This function initializes the ResultDemonstrator class and sets up an interactive interface
where users can choose to view either evaluation results & analysis or evaluation samples.
Based on the user's selection, different aspects of the evaluation are displayed, such as
main & ablation results, results per question category, or the impact of prompt length on performance.
Returns:
None
"""
demo = ResultDemonstrator() # Instantiate the ResultDemonstrator class
col1, col2 = st.columns([1, 4])
with col1:
# User selects the evaluation analysis aspect
section_type = st.radio("Select Evaluation Aspect", ["Evaluation Results & Analysis", 'Evaluation Samples'])
# Only show analysis type if the section type is "Evaluation Results & Analysis"
if section_type == "Evaluation Results & Analysis":
analysis_type = st.radio("Select Type", ["Main & Ablation Results", "Results per Question Category",
"Prompt Length (token count) Impact on Performance"], index=2)
if analysis_type == "Prompt Length (token count) Impact on Performance":
# Based on the selection, other options appear
model_name = st.radio("Select Model Size", config.MODEL_NAMES)
score_name = st.radio("Select Score Type", ["VQA Score", "Exact Match"])
elif section_type == 'Evaluation Samples':
samples_button = st.button("Generate Random Samples")
with col2:
if section_type == "Evaluation Results & Analysis":
if analysis_type == "Prompt Length (token count) Impact on Performance":
for conf in config.MODEL_CONFIGURATIONS:
with st.expander(conf):
demo.plot_token_count_vs_scores(conf, model_name, score_name)
elif analysis_type == "Main & Ablation Results":
demo.display_main_results()
elif analysis_type == "Results per Question Category":
demo.display_ablation_results_per_question_category()
elif section_type == 'Evaluation Samples':
if samples_button:
demo.show_samples(3)