Spaces:
Sleeping
Sleeping
File size: 2,077 Bytes
dab7e6b d6824cb fb754b1 d6824cb fb754b1 d6824cb fb754b1 dfa0c95 d6824cb 05beea4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
import streamlit as st
from my_model.results.demo import ResultDemonstrator
from my_model.config import evaluation_config as config
def run_demo():
"""
Run the interactive Streamlit demo for visualizing model evaluation results and analysis.
"""
demo = ResultDemonstrator() # Instantiate the ResultDemonstrator class
col1, col2 = st.columns([1, 4])
with col1:
# User selects the evaluation analysis aspect
section_type = st.radio("Select Evaluation Aspect", ["Evaluation Results & Analysis", 'Evaluation Samples'])
# Only show analysis type if the section type is "Evaluation Results & Analysis"
if section_type == "Evaluation Results & Analysis":
analysis_type = st.radio("Select Type", ["Main & Ablation Results", "Results per Question Category",
"Prompt Length (token count) Impact on Performance"], index=2)
if analysis_type == "Prompt Length (token count) Impact on Performance":
# Based on the selection, other options appear
model_name = st.radio("Select Model Size", config.MODEL_NAMES)
score_name = st.radio("Select Score Type", ["VQA Score", "Exact Match"])
elif section_type == 'Evaluation Samples':
samples_button = st.button("Generate Random Samples")
with col2:
if section_type == "Evaluation Results & Analysis":
if analysis_type == "Prompt Length (token count) Impact on Performance":
for conf in config.MODEL_CONFIGURATIONS:
with st.expander(conf):
demo.plot_token_count_vs_scores(conf, model_name, score_name)
elif analysis_type == "Main & Ablation Results":
demo.display_main_results()
elif analysis_type == "Results per Question Category":
demo.display_ablation_results_per_question_category()
elif section_type == 'Evaluation Samples':
if samples_button:
demo.show_samples(3) |