diff --git a/app.py b/app.py index e7d280998378cc30564cdb6a74eac76e33b1ab81..a0d4bb5a37c308b3b6c0841361b19cfb75c06d72 100644 --- a/app.py +++ b/app.py @@ -2,8 +2,6 @@ import os import pandas as pd import utils import base64 -import shutil -import zipfile from flask import Flask, render_template, request, redirect, url_for from postmarker.core import PostmarkClient from werkzeug.utils import secure_filename @@ -61,7 +59,7 @@ def index(): @app.route('/model/') def model_detail(model_name): - df = pd.read_csv(f'static/figures/{model_name}/cfa_metrics.csv') + df = pd.read_csv(f'static/models_data/{model_name}/cfa_metrics.csv') df = df.round(3) df.insert(0, '#', '') @@ -78,7 +76,21 @@ def model_detail(model_name): classes = 'table table-striped table-bordered' cfa_table_html = df.to_html(classes=classes, escape=False, index=False) - return render_template('model_detail.html', model_name=model_name, cfa_table_html=cfa_table_html) + # Load model specific description + model_detail_file = f'static/models_data/{model_name}/model_detail.html' + if os.path.exists(model_detail_file): + # If the file exists, open and read the HTML file as a string + with open(model_detail_file, 'r', encoding='utf-8') as file: + model_detail = file.read() + else: + model_detail = "

No additional detail is provided for this model.

" + + return render_template( + 'model_detail.html', + model_name=model_name, + model_detail=model_detail, + cfa_table_html=cfa_table_html + ) @app.route('/about') def about(): diff --git a/static/figures/Mistral-7B-Instruct-v0.1/cfa_metrics.csv b/static/models_data/Mistral-7B-Instruct-v0.1/cfa_metrics.csv similarity index 100% rename from static/figures/Mistral-7B-Instruct-v0.1/cfa_metrics.csv rename to static/models_data/Mistral-7B-Instruct-v0.1/cfa_metrics.csv diff --git a/static/figures/Mistral-7B-Instruct-v0.1/matrix.svg b/static/models_data/Mistral-7B-Instruct-v0.1/matrix.svg similarity index 100% rename from static/figures/Mistral-7B-Instruct-v0.1/matrix.svg rename to static/models_data/Mistral-7B-Instruct-v0.1/matrix.svg diff --git a/static/figures/Mistral-7B-Instruct-v0.1/ranks.svg b/static/models_data/Mistral-7B-Instruct-v0.1/ranks.svg similarity index 100% rename from static/figures/Mistral-7B-Instruct-v0.1/ranks.svg rename to static/models_data/Mistral-7B-Instruct-v0.1/ranks.svg diff --git a/static/figures/Mistral-7B-Instruct-v0.1/structure.svg b/static/models_data/Mistral-7B-Instruct-v0.1/structure.svg similarity index 100% rename from static/figures/Mistral-7B-Instruct-v0.1/structure.svg rename to static/models_data/Mistral-7B-Instruct-v0.1/structure.svg diff --git a/static/figures/Mistral-7B-Instruct-v0.2/cfa_metrics.csv b/static/models_data/Mistral-7B-Instruct-v0.2/cfa_metrics.csv similarity index 100% rename from static/figures/Mistral-7B-Instruct-v0.2/cfa_metrics.csv rename to static/models_data/Mistral-7B-Instruct-v0.2/cfa_metrics.csv diff --git a/static/figures/Mistral-7B-Instruct-v0.2/matrix.svg b/static/models_data/Mistral-7B-Instruct-v0.2/matrix.svg similarity index 100% rename from static/figures/Mistral-7B-Instruct-v0.2/matrix.svg rename to static/models_data/Mistral-7B-Instruct-v0.2/matrix.svg diff --git a/static/figures/Mistral-7B-Instruct-v0.2/ranks.svg b/static/models_data/Mistral-7B-Instruct-v0.2/ranks.svg similarity index 100% rename from static/figures/Mistral-7B-Instruct-v0.2/ranks.svg rename to static/models_data/Mistral-7B-Instruct-v0.2/ranks.svg diff --git a/static/figures/Mistral-7B-Instruct-v0.2/structure.svg b/static/models_data/Mistral-7B-Instruct-v0.2/structure.svg similarity index 100% rename from static/figures/Mistral-7B-Instruct-v0.2/structure.svg rename to static/models_data/Mistral-7B-Instruct-v0.2/structure.svg diff --git a/static/figures/Mistral-7B-Instruct-v0.3/cfa_metrics.csv b/static/models_data/Mistral-7B-Instruct-v0.3/cfa_metrics.csv similarity index 100% rename from static/figures/Mistral-7B-Instruct-v0.3/cfa_metrics.csv rename to static/models_data/Mistral-7B-Instruct-v0.3/cfa_metrics.csv diff --git a/static/figures/Mistral-7B-Instruct-v0.3/matrix.svg b/static/models_data/Mistral-7B-Instruct-v0.3/matrix.svg similarity index 100% rename from static/figures/Mistral-7B-Instruct-v0.3/matrix.svg rename to static/models_data/Mistral-7B-Instruct-v0.3/matrix.svg diff --git a/static/figures/Mistral-7B-Instruct-v0.3/ranks.svg b/static/models_data/Mistral-7B-Instruct-v0.3/ranks.svg similarity index 100% rename from static/figures/Mistral-7B-Instruct-v0.3/ranks.svg rename to static/models_data/Mistral-7B-Instruct-v0.3/ranks.svg diff --git a/static/figures/Mistral-7B-Instruct-v0.3/structure.svg b/static/models_data/Mistral-7B-Instruct-v0.3/structure.svg similarity index 100% rename from static/figures/Mistral-7B-Instruct-v0.3/structure.svg rename to static/models_data/Mistral-7B-Instruct-v0.3/structure.svg diff --git a/static/figures/Mistral-Large-Instruct-2407/cfa_metrics.csv b/static/models_data/Mistral-Large-Instruct-2407/cfa_metrics.csv similarity index 100% rename from static/figures/Mistral-Large-Instruct-2407/cfa_metrics.csv rename to static/models_data/Mistral-Large-Instruct-2407/cfa_metrics.csv diff --git a/static/figures/Mistral-Large-Instruct-2407/matrix.svg b/static/models_data/Mistral-Large-Instruct-2407/matrix.svg similarity index 100% rename from static/figures/Mistral-Large-Instruct-2407/matrix.svg rename to static/models_data/Mistral-Large-Instruct-2407/matrix.svg diff --git a/static/models_data/Mistral-Large-Instruct-2407/model_detail.html b/static/models_data/Mistral-Large-Instruct-2407/model_detail.html new file mode 100644 index 0000000000000000000000000000000000000000..d15208d783fc1b04026e02535f3e55150fbdd621 --- /dev/null +++ b/static/models_data/Mistral-Large-Instruct-2407/model_detail.html @@ -0,0 +1 @@ +

This model was released by Mistral AI

\ No newline at end of file diff --git a/static/figures/Mistral-Large-Instruct-2407/ranks.svg b/static/models_data/Mistral-Large-Instruct-2407/ranks.svg similarity index 100% rename from static/figures/Mistral-Large-Instruct-2407/ranks.svg rename to static/models_data/Mistral-Large-Instruct-2407/ranks.svg diff --git a/static/figures/Mistral-Large-Instruct-2407/structure.svg b/static/models_data/Mistral-Large-Instruct-2407/structure.svg similarity index 100% rename from static/figures/Mistral-Large-Instruct-2407/structure.svg rename to static/models_data/Mistral-Large-Instruct-2407/structure.svg diff --git a/static/figures/Mixtral-8x22B-Instruct-v0.1/cfa_metrics.csv b/static/models_data/Mixtral-8x22B-Instruct-v0.1/cfa_metrics.csv similarity index 100% rename from static/figures/Mixtral-8x22B-Instruct-v0.1/cfa_metrics.csv rename to static/models_data/Mixtral-8x22B-Instruct-v0.1/cfa_metrics.csv diff --git a/static/figures/Mixtral-8x22B-Instruct-v0.1/matrix.svg b/static/models_data/Mixtral-8x22B-Instruct-v0.1/matrix.svg similarity index 100% rename from static/figures/Mixtral-8x22B-Instruct-v0.1/matrix.svg rename to static/models_data/Mixtral-8x22B-Instruct-v0.1/matrix.svg diff --git a/static/figures/Mixtral-8x22B-Instruct-v0.1/ranks.svg b/static/models_data/Mixtral-8x22B-Instruct-v0.1/ranks.svg similarity index 100% rename from static/figures/Mixtral-8x22B-Instruct-v0.1/ranks.svg rename to static/models_data/Mixtral-8x22B-Instruct-v0.1/ranks.svg diff --git a/static/figures/Mixtral-8x22B-Instruct-v0.1/structure.svg b/static/models_data/Mixtral-8x22B-Instruct-v0.1/structure.svg similarity index 100% rename from static/figures/Mixtral-8x22B-Instruct-v0.1/structure.svg rename to static/models_data/Mixtral-8x22B-Instruct-v0.1/structure.svg diff --git a/static/figures/Mixtral-8x7B-Instruct-v0.1/cfa_metrics.csv b/static/models_data/Mixtral-8x7B-Instruct-v0.1/cfa_metrics.csv similarity index 100% rename from static/figures/Mixtral-8x7B-Instruct-v0.1/cfa_metrics.csv rename to static/models_data/Mixtral-8x7B-Instruct-v0.1/cfa_metrics.csv diff --git a/static/figures/Mixtral-8x7B-Instruct-v0.1/matrix.svg b/static/models_data/Mixtral-8x7B-Instruct-v0.1/matrix.svg similarity index 100% rename from static/figures/Mixtral-8x7B-Instruct-v0.1/matrix.svg rename to static/models_data/Mixtral-8x7B-Instruct-v0.1/matrix.svg diff --git a/static/figures/Mixtral-8x7B-Instruct-v0.1/ranks.svg b/static/models_data/Mixtral-8x7B-Instruct-v0.1/ranks.svg similarity index 100% rename from static/figures/Mixtral-8x7B-Instruct-v0.1/ranks.svg rename to static/models_data/Mixtral-8x7B-Instruct-v0.1/ranks.svg diff --git a/static/figures/Mixtral-8x7B-Instruct-v0.1/structure.svg b/static/models_data/Mixtral-8x7B-Instruct-v0.1/structure.svg similarity index 100% rename from static/figures/Mixtral-8x7B-Instruct-v0.1/structure.svg rename to static/models_data/Mixtral-8x7B-Instruct-v0.1/structure.svg diff --git a/static/figures/Qwen2-72B-Instruct/cfa_metrics.csv b/static/models_data/Qwen2-72B-Instruct/cfa_metrics.csv similarity index 100% rename from static/figures/Qwen2-72B-Instruct/cfa_metrics.csv rename to static/models_data/Qwen2-72B-Instruct/cfa_metrics.csv diff --git a/static/figures/Qwen2-72B-Instruct/matrix.svg b/static/models_data/Qwen2-72B-Instruct/matrix.svg similarity index 100% rename from static/figures/Qwen2-72B-Instruct/matrix.svg rename to static/models_data/Qwen2-72B-Instruct/matrix.svg diff --git a/static/figures/Qwen2-72B-Instruct/ranks.svg b/static/models_data/Qwen2-72B-Instruct/ranks.svg similarity index 100% rename from static/figures/Qwen2-72B-Instruct/ranks.svg rename to static/models_data/Qwen2-72B-Instruct/ranks.svg diff --git a/static/figures/Qwen2-72B-Instruct/structure.svg b/static/models_data/Qwen2-72B-Instruct/structure.svg similarity index 100% rename from static/figures/Qwen2-72B-Instruct/structure.svg rename to static/models_data/Qwen2-72B-Instruct/structure.svg diff --git a/static/figures/Qwen2-7B-Instruct/cfa_metrics.csv b/static/models_data/Qwen2-7B-Instruct/cfa_metrics.csv similarity index 100% rename from static/figures/Qwen2-7B-Instruct/cfa_metrics.csv rename to static/models_data/Qwen2-7B-Instruct/cfa_metrics.csv diff --git a/static/figures/Qwen2-7B-Instruct/matrix.svg b/static/models_data/Qwen2-7B-Instruct/matrix.svg similarity index 100% rename from static/figures/Qwen2-7B-Instruct/matrix.svg rename to static/models_data/Qwen2-7B-Instruct/matrix.svg diff --git a/static/figures/Qwen2-7B-Instruct/ranks.svg b/static/models_data/Qwen2-7B-Instruct/ranks.svg similarity index 100% rename from static/figures/Qwen2-7B-Instruct/ranks.svg rename to static/models_data/Qwen2-7B-Instruct/ranks.svg diff --git a/static/figures/Qwen2-7B-Instruct/structure.svg b/static/models_data/Qwen2-7B-Instruct/structure.svg similarity index 100% rename from static/figures/Qwen2-7B-Instruct/structure.svg rename to static/models_data/Qwen2-7B-Instruct/structure.svg diff --git a/static/figures/admin_questionnaire.svg b/static/models_data/admin_questionnaire.svg similarity index 100% rename from static/figures/admin_questionnaire.svg rename to static/models_data/admin_questionnaire.svg diff --git a/static/figures/cardinal.svg b/static/models_data/cardinal.svg similarity index 100% rename from static/figures/cardinal.svg rename to static/models_data/cardinal.svg diff --git a/static/figures/command_r_plus/cfa_metrics.csv b/static/models_data/command_r_plus/cfa_metrics.csv similarity index 100% rename from static/figures/command_r_plus/cfa_metrics.csv rename to static/models_data/command_r_plus/cfa_metrics.csv diff --git a/static/figures/command_r_plus/matrix.svg b/static/models_data/command_r_plus/matrix.svg similarity index 100% rename from static/figures/command_r_plus/matrix.svg rename to static/models_data/command_r_plus/matrix.svg diff --git a/static/figures/command_r_plus/ranks.svg b/static/models_data/command_r_plus/ranks.svg similarity index 100% rename from static/figures/command_r_plus/ranks.svg rename to static/models_data/command_r_plus/ranks.svg diff --git a/static/figures/command_r_plus/structure.svg b/static/models_data/command_r_plus/structure.svg similarity index 100% rename from static/figures/command_r_plus/structure.svg rename to static/models_data/command_r_plus/structure.svg diff --git a/static/figures/dummy/cfa_metrics.csv b/static/models_data/dummy/cfa_metrics.csv similarity index 100% rename from static/figures/dummy/cfa_metrics.csv rename to static/models_data/dummy/cfa_metrics.csv diff --git a/static/figures/dummy/matrix.svg b/static/models_data/dummy/matrix.svg similarity index 100% rename from static/figures/dummy/matrix.svg rename to static/models_data/dummy/matrix.svg diff --git a/static/figures/dummy/ranks.svg b/static/models_data/dummy/ranks.svg similarity index 100% rename from static/figures/dummy/ranks.svg rename to static/models_data/dummy/ranks.svg diff --git a/static/figures/dummy/structure.svg b/static/models_data/dummy/structure.svg similarity index 100% rename from static/figures/dummy/structure.svg rename to static/models_data/dummy/structure.svg diff --git a/static/figures/gpt-3.5-turbo-0125/cfa_metrics.csv b/static/models_data/gpt-3.5-turbo-0125/cfa_metrics.csv similarity index 100% rename from static/figures/gpt-3.5-turbo-0125/cfa_metrics.csv rename to static/models_data/gpt-3.5-turbo-0125/cfa_metrics.csv diff --git a/static/figures/gpt-3.5-turbo-0125/matrix.svg b/static/models_data/gpt-3.5-turbo-0125/matrix.svg similarity index 100% rename from static/figures/gpt-3.5-turbo-0125/matrix.svg rename to static/models_data/gpt-3.5-turbo-0125/matrix.svg diff --git a/static/figures/gpt-3.5-turbo-0125/ranks.svg b/static/models_data/gpt-3.5-turbo-0125/ranks.svg similarity index 100% rename from static/figures/gpt-3.5-turbo-0125/ranks.svg rename to static/models_data/gpt-3.5-turbo-0125/ranks.svg diff --git a/static/figures/gpt-3.5-turbo-0125/structure.svg b/static/models_data/gpt-3.5-turbo-0125/structure.svg similarity index 100% rename from static/figures/gpt-3.5-turbo-0125/structure.svg rename to static/models_data/gpt-3.5-turbo-0125/structure.svg diff --git a/static/figures/gpt-4o-0513/cfa_metrics.csv b/static/models_data/gpt-4o-0513/cfa_metrics.csv similarity index 100% rename from static/figures/gpt-4o-0513/cfa_metrics.csv rename to static/models_data/gpt-4o-0513/cfa_metrics.csv diff --git a/static/figures/gpt-4o-0513/matrix.svg b/static/models_data/gpt-4o-0513/matrix.svg similarity index 100% rename from static/figures/gpt-4o-0513/matrix.svg rename to static/models_data/gpt-4o-0513/matrix.svg diff --git a/static/figures/gpt-4o-0513/ranks.svg b/static/models_data/gpt-4o-0513/ranks.svg similarity index 100% rename from static/figures/gpt-4o-0513/ranks.svg rename to static/models_data/gpt-4o-0513/ranks.svg diff --git a/static/figures/gpt-4o-0513/structure.svg b/static/models_data/gpt-4o-0513/structure.svg similarity index 100% rename from static/figures/gpt-4o-0513/structure.svg rename to static/models_data/gpt-4o-0513/structure.svg diff --git a/static/figures/gpt-4o-mini-2024-07-18/cfa_metrics.csv b/static/models_data/gpt-4o-mini-2024-07-18/cfa_metrics.csv similarity index 100% rename from static/figures/gpt-4o-mini-2024-07-18/cfa_metrics.csv rename to static/models_data/gpt-4o-mini-2024-07-18/cfa_metrics.csv diff --git a/static/figures/gpt-4o-mini-2024-07-18/matrix.svg b/static/models_data/gpt-4o-mini-2024-07-18/matrix.svg similarity index 100% rename from static/figures/gpt-4o-mini-2024-07-18/matrix.svg rename to static/models_data/gpt-4o-mini-2024-07-18/matrix.svg diff --git a/static/figures/gpt-4o-mini-2024-07-18/ranks.svg b/static/models_data/gpt-4o-mini-2024-07-18/ranks.svg similarity index 100% rename from static/figures/gpt-4o-mini-2024-07-18/ranks.svg rename to static/models_data/gpt-4o-mini-2024-07-18/ranks.svg diff --git a/static/figures/gpt-4o-mini-2024-07-18/structure.svg b/static/models_data/gpt-4o-mini-2024-07-18/structure.svg similarity index 100% rename from static/figures/gpt-4o-mini-2024-07-18/structure.svg rename to static/models_data/gpt-4o-mini-2024-07-18/structure.svg diff --git a/static/figures/gpt_no_conv_structure.svg b/static/models_data/gpt_no_conv_structure.svg similarity index 100% rename from static/figures/gpt_no_conv_structure.svg rename to static/models_data/gpt_no_conv_structure.svg diff --git a/static/figures/llama_3.1_405b_instruct_4bit/cfa_metrics.csv b/static/models_data/llama_3.1_405b_instruct_4bit/cfa_metrics.csv similarity index 100% rename from static/figures/llama_3.1_405b_instruct_4bit/cfa_metrics.csv rename to static/models_data/llama_3.1_405b_instruct_4bit/cfa_metrics.csv diff --git a/static/figures/llama_3.1_405b_instruct_4bit/matrix.svg b/static/models_data/llama_3.1_405b_instruct_4bit/matrix.svg similarity index 100% rename from static/figures/llama_3.1_405b_instruct_4bit/matrix.svg rename to static/models_data/llama_3.1_405b_instruct_4bit/matrix.svg diff --git a/static/figures/llama_3.1_405b_instruct_4bit/ranks.svg b/static/models_data/llama_3.1_405b_instruct_4bit/ranks.svg similarity index 100% rename from static/figures/llama_3.1_405b_instruct_4bit/ranks.svg rename to static/models_data/llama_3.1_405b_instruct_4bit/ranks.svg diff --git a/static/figures/llama_3.1_405b_instruct_4bit/structure.svg b/static/models_data/llama_3.1_405b_instruct_4bit/structure.svg similarity index 100% rename from static/figures/llama_3.1_405b_instruct_4bit/structure.svg rename to static/models_data/llama_3.1_405b_instruct_4bit/structure.svg diff --git a/static/figures/llama_3.1_70b_instruct/cfa_metrics.csv b/static/models_data/llama_3.1_70b_instruct/cfa_metrics.csv similarity index 100% rename from static/figures/llama_3.1_70b_instruct/cfa_metrics.csv rename to static/models_data/llama_3.1_70b_instruct/cfa_metrics.csv diff --git a/static/figures/llama_3.1_70b_instruct/matrix.svg b/static/models_data/llama_3.1_70b_instruct/matrix.svg similarity index 100% rename from static/figures/llama_3.1_70b_instruct/matrix.svg rename to static/models_data/llama_3.1_70b_instruct/matrix.svg diff --git a/static/figures/llama_3.1_70b_instruct/ranks.svg b/static/models_data/llama_3.1_70b_instruct/ranks.svg similarity index 100% rename from static/figures/llama_3.1_70b_instruct/ranks.svg rename to static/models_data/llama_3.1_70b_instruct/ranks.svg diff --git a/static/figures/llama_3.1_70b_instruct/structure.svg b/static/models_data/llama_3.1_70b_instruct/structure.svg similarity index 100% rename from static/figures/llama_3.1_70b_instruct/structure.svg rename to static/models_data/llama_3.1_70b_instruct/structure.svg diff --git a/static/figures/llama_3.1_8b_instruct/cfa_metrics.csv b/static/models_data/llama_3.1_8b_instruct/cfa_metrics.csv similarity index 100% rename from static/figures/llama_3.1_8b_instruct/cfa_metrics.csv rename to static/models_data/llama_3.1_8b_instruct/cfa_metrics.csv diff --git a/static/figures/llama_3.1_8b_instruct/matrix.svg b/static/models_data/llama_3.1_8b_instruct/matrix.svg similarity index 100% rename from static/figures/llama_3.1_8b_instruct/matrix.svg rename to static/models_data/llama_3.1_8b_instruct/matrix.svg diff --git a/static/figures/llama_3.1_8b_instruct/ranks.svg b/static/models_data/llama_3.1_8b_instruct/ranks.svg similarity index 100% rename from static/figures/llama_3.1_8b_instruct/ranks.svg rename to static/models_data/llama_3.1_8b_instruct/ranks.svg diff --git a/static/figures/llama_3.1_8b_instruct/structure.svg b/static/models_data/llama_3.1_8b_instruct/structure.svg similarity index 100% rename from static/figures/llama_3.1_8b_instruct/structure.svg rename to static/models_data/llama_3.1_8b_instruct/structure.svg diff --git a/static/figures/llama_3_70b_instruct/cfa_metrics.csv b/static/models_data/llama_3_70b_instruct/cfa_metrics.csv similarity index 100% rename from static/figures/llama_3_70b_instruct/cfa_metrics.csv rename to static/models_data/llama_3_70b_instruct/cfa_metrics.csv diff --git a/static/figures/llama_3_70b_instruct/matrix.svg b/static/models_data/llama_3_70b_instruct/matrix.svg similarity index 100% rename from static/figures/llama_3_70b_instruct/matrix.svg rename to static/models_data/llama_3_70b_instruct/matrix.svg diff --git a/static/figures/llama_3_70b_instruct/ranks.svg b/static/models_data/llama_3_70b_instruct/ranks.svg similarity index 100% rename from static/figures/llama_3_70b_instruct/ranks.svg rename to static/models_data/llama_3_70b_instruct/ranks.svg diff --git a/static/figures/llama_3_70b_instruct/structure.svg b/static/models_data/llama_3_70b_instruct/structure.svg similarity index 100% rename from static/figures/llama_3_70b_instruct/structure.svg rename to static/models_data/llama_3_70b_instruct/structure.svg diff --git a/static/figures/llama_3_8b_instruct/cfa_metrics.csv b/static/models_data/llama_3_8b_instruct/cfa_metrics.csv similarity index 100% rename from static/figures/llama_3_8b_instruct/cfa_metrics.csv rename to static/models_data/llama_3_8b_instruct/cfa_metrics.csv diff --git a/static/figures/llama_3_8b_instruct/matrix.svg b/static/models_data/llama_3_8b_instruct/matrix.svg similarity index 100% rename from static/figures/llama_3_8b_instruct/matrix.svg rename to static/models_data/llama_3_8b_instruct/matrix.svg diff --git a/static/figures/llama_3_8b_instruct/ranks.svg b/static/models_data/llama_3_8b_instruct/ranks.svg similarity index 100% rename from static/figures/llama_3_8b_instruct/ranks.svg rename to static/models_data/llama_3_8b_instruct/ranks.svg diff --git a/static/figures/llama_3_8b_instruct/structure.svg b/static/models_data/llama_3_8b_instruct/structure.svg similarity index 100% rename from static/figures/llama_3_8b_instruct/structure.svg rename to static/models_data/llama_3_8b_instruct/structure.svg diff --git a/static/figures/ordinal.svg b/static/models_data/ordinal.svg similarity index 100% rename from static/figures/ordinal.svg rename to static/models_data/ordinal.svg diff --git a/static/figures/phi-3-medium-128k-instruct/cfa_metrics.csv b/static/models_data/phi-3-medium-128k-instruct/cfa_metrics.csv similarity index 100% rename from static/figures/phi-3-medium-128k-instruct/cfa_metrics.csv rename to static/models_data/phi-3-medium-128k-instruct/cfa_metrics.csv diff --git a/static/figures/phi-3-medium-128k-instruct/matrix.svg b/static/models_data/phi-3-medium-128k-instruct/matrix.svg similarity index 100% rename from static/figures/phi-3-medium-128k-instruct/matrix.svg rename to static/models_data/phi-3-medium-128k-instruct/matrix.svg diff --git a/static/figures/phi-3-medium-128k-instruct/ranks.svg b/static/models_data/phi-3-medium-128k-instruct/ranks.svg similarity index 100% rename from static/figures/phi-3-medium-128k-instruct/ranks.svg rename to static/models_data/phi-3-medium-128k-instruct/ranks.svg diff --git a/static/figures/phi-3-medium-128k-instruct/structure.svg b/static/models_data/phi-3-medium-128k-instruct/structure.svg similarity index 100% rename from static/figures/phi-3-medium-128k-instruct/structure.svg rename to static/models_data/phi-3-medium-128k-instruct/structure.svg diff --git a/static/figures/phi-3-mini-128k-instruct/cfa_metrics.csv b/static/models_data/phi-3-mini-128k-instruct/cfa_metrics.csv similarity index 100% rename from static/figures/phi-3-mini-128k-instruct/cfa_metrics.csv rename to static/models_data/phi-3-mini-128k-instruct/cfa_metrics.csv diff --git a/static/figures/phi-3-mini-128k-instruct/matrix.svg b/static/models_data/phi-3-mini-128k-instruct/matrix.svg similarity index 100% rename from static/figures/phi-3-mini-128k-instruct/matrix.svg rename to static/models_data/phi-3-mini-128k-instruct/matrix.svg diff --git a/static/figures/phi-3-mini-128k-instruct/ranks.svg b/static/models_data/phi-3-mini-128k-instruct/ranks.svg similarity index 100% rename from static/figures/phi-3-mini-128k-instruct/ranks.svg rename to static/models_data/phi-3-mini-128k-instruct/ranks.svg diff --git a/static/figures/phi-3-mini-128k-instruct/structure.svg b/static/models_data/phi-3-mini-128k-instruct/structure.svg similarity index 100% rename from static/figures/phi-3-mini-128k-instruct/structure.svg rename to static/models_data/phi-3-mini-128k-instruct/structure.svg diff --git a/static/figures/rank_order_stability_computation.svg b/static/models_data/rank_order_stability_computation.svg similarity index 100% rename from static/figures/rank_order_stability_computation.svg rename to static/models_data/rank_order_stability_computation.svg diff --git a/static/figures/theoretical_structure.svg b/static/models_data/theoretical_structure.svg similarity index 100% rename from static/figures/theoretical_structure.svg rename to static/models_data/theoretical_structure.svg diff --git a/templates/about.html b/templates/about.html index db15f16414226d0ed92b11aacf6fcd8324bb43cb..1ec3149fa2b93ec3e83d929c69140cea4e05309a 100644 --- a/templates/about.html +++ b/templates/about.html @@ -80,6 +80,20 @@ text-align: left; } + .citation-section ul { + margin: auto; /* Center the table */ + margin-top: 20px; + margin-bottom: 10px; + max-width: 1000px; /* Adjust the width as needed */ + text-align: left; + list-style-type: disc; + padding-left: 20px; /* Add padding to indent list items */ + } + + .citation-section ul li { + margin-bottom: 10px; /* Add space between list items */ + } + .section ol,ul { padding-left: 150px; padding-right: 150px; @@ -91,9 +105,10 @@ } .citation-section { - width: 100%; - margin-top: 50px; - text-align: center; + margin-top: 20px; + text-align: left; + max-width: 1000px; + margin: auto; } .citation-box { background-color: #f8f9fa; @@ -101,7 +116,7 @@ border-radius: 8px; padding: 10px; margin-top: 5px; - font-size: 15px; + font-size: 13px; text-align: left; font-family: 'Courier New', Courier, monospace; white-space: pre; @@ -157,6 +172,7 @@ .back-button { text-align: center; margin-top: 50px; + margin-bottom: 30px; } .custom-button { background-color: #610b5d; @@ -200,6 +216,7 @@ We adopt the Schwartz Theory of Basic Personal Values, which defines 10 values: Self-Direction, Stimulation, Hedonism, Achievement, Power, Security, Conformity, Tradition, Benevolence, and Universalism. To evaluate their expression we use the associated questionnaires: PVQ-40, and SVS.

+

You can browse the questionnaires, population, and contexts used on our 🤗 StickToYourRole dataset.

The Stick to Your Role! leaderboard aims to provide an up-to-date comparison of recent LLMs based on their ability to coherently simulate popultions. It, in tandem with other minimal-context benchmarks, should enable you to choose the best-suited model for your usecase! @@ -221,13 +238,13 @@

  • The questionnaire is scored to obtain scores for the 10 personal values
    -
    Contexts
    +
    Context chunks

    We aim to score the expressed value profile for each simulated persona in different contexts. More precisely a population (50 personas) is evaluated with a context chunk (50 topics: one per persona). @@ -237,10 +254,11 @@

    • no_conv : no conversation is simulated the questions from the PVQ-40 questionnaire are given directly
    • no_conv_svs : no conversation is simulated the questions from the SVS questionnaire are given directly
    • -
    • chunk_0-chunk-4 : 50 reddit posts used as the initial Interlocutor model messages (one per persona). chunk_0 contains the longest posts, chunk_4 the shortest.
    • +
    • chunk_0-chunk-4 : 50 reddit posts used as the initial Interlocutor model messages (one per persona). chunk_0 contains the longest posts, chunk_4 the shortest.
    • chess : "1. e4" is given as the initial message to all personas, but for each persona the Interlocutor model is instructed to simulate a different persona (instead of a human user)
    • grammar : like chess, but "Can you check this sentence for grammar? \n Whilst Jane was waiting to meet hers friend their nose started bleeding." is given as the initial message.
    +

    You can browse the simulated population, questionnaires, and contexts used on our 🤗 StickToYourRole dataset.

    Validation
    @@ -266,11 +284,11 @@

    @@ -303,15 +321,16 @@ Rank-Order stability (↑) is used to estimate the stability of some value inside a population. In psychology, it is computed as the correlation in the order of individuals at two points in time (individuals are ordered based on their expression of that value). - Intuitively, this can be seen as addressing the following question: "Does Jack always value Tradition more than Jane does?". + Intuitively, this can be seen as addressing the following question: + "Does Jack always (in every context) value Tradition more than Jane does?". As shown below, instead of comparing two points in time, we compare the simulated population in different contexts (simulated conversations of different topics). We then average over different context pairs and values to obtain the final estimate.

    @@ -357,7 +376,6 @@ their expression of that value).
  • in the paper, multiple seeds for the order of suggested answers were used, given that the results didn't vary much between seeds, here, a single seed was used facilitating the analysis with more longer contexts
  • evaluations were also done without simulating conversations (no_conv setting)
  • evaluations were also done with the SVS questionnaire (in the no_conv setting)
  • -
  • validation metrics - Stress, CFI, SRMR, RMSEA metrics were introduced
  • cardinal and ordinal ordering with sensitivity and diversity estimates were added
  • newer models were evaluated
  • @@ -367,10 +385,23 @@ their expression of that value).

    + Motivation and Methodology page Main page
    -

    If you found this project useful, please cite our related paper:

    +

    If you found this project useful, please cite one of our related papers.

    + +

    Short paper

    +
    +@inproceedings{kovavc2024stick, + title={Stick to your Role! Stability of Personal Values Expressed in Large Language Models}, + author={Kova{\v{c}}, Grgur and Portelas, R{\'e}my and Sawayama, Masataka and Dominey, Peter Ford and Oudeyer, Pierre-Yves}, + booktitle={Proceedings of the Annual Meeting of the Cognitive Science Society}, + volume={46}, + year={2024} +} +
    +

    Long paper

    @article{kovavc2024stick, title={Stick to your role! Stability of personal values expressed in large language models}, @@ -383,6 +414,12 @@ their expression of that value). publisher={Public Library of Science San Francisco, CA USA} }
    +
    diff --git a/templates/index.html b/templates/index.html index c092d0a9edbcd548f81d07d45fef96d8aff69a9c..f37ae357564088677392ace9bdb9da28592c090a 100644 --- a/templates/index.html +++ b/templates/index.html @@ -50,8 +50,15 @@ margin-bottom: 10px; max-width: 1000px; /* Adjust the width as needed */ text-align: left; + list-style-type: disc; + padding-left: 20px; /* Add padding to indent list items */ } + ul li { + margin-bottom: 10px; /* Add space between list items */ + } + + .table-responsive { margin-top: 20px; max-width: 1000px; /* Adjust the width as needed */ @@ -245,17 +252,18 @@ The Stick to You Role! leaderboard focuses on the stability of simulated personal values during role-playing. We study the coherence of a simulated population. In contrast to evaluating each simulated persona separately, we evaluate personas relative to each other, i.e. as a population. + You can browse the simulated population, questionnaires, and contexts used on our 🤗 StickToYourRole dataset.

    {{ main_table_html|safe }}
    - - Cardinal + + Cardinal - - Ordinal + + Ordinal

    @@ -269,7 +277,6 @@ Refer here or to our paper for more details.

    - In addition to Rank-Order stability we compute validity metrics (Stress, CFI, SRMR, RMSEA), which are a common practice in psychology. Validity refers to the extent to which the questionnaire measures what it purports to measure. It can be seen as the questionnaire's accuracy in measuring the intended factors, i.e. values. @@ -283,14 +290,41 @@

    To sum up here are the metrics used:

    @@ -298,17 +332,26 @@ {{ full_table_html|safe }}
    - Learn More About This Project -
    -
    + Motivation and Methodology page Submit a model

    - If you found this project useful, please cite our related paper, + If you found this project useful, please cite one of our related papers, which this leaderboard extends with a more focused and elaborate experimental setup. Refer here for details.

    +

    Short paper

    +
    +@inproceedings{kovavc2024stick, + title={Stick to your Role! Stability of Personal Values Expressed in Large Language Models}, + author={Kova{\v{c}}, Grgur and Portelas, R{\'e}my and Sawayama, Masataka and Dominey, Peter Ford and Oudeyer, Pierre-Yves}, + booktitle={Proceedings of the Annual Meeting of the Cognitive Science Society}, + volume={46}, + year={2024} +} +
    +

    Long paper

    @article{kovavc2024stick, title={Stick to your role! Stability of personal values expressed in large language models}, diff --git a/templates/model_detail.html b/templates/model_detail.html index 17da43c9f033ed2384855598650ae91cbed3d205..456638f5169f0c714dce8d60db7d18d2450d5ee3 100644 --- a/templates/model_detail.html +++ b/templates/model_detail.html @@ -23,7 +23,6 @@ box-shadow: 0 4px 8px rgba(0,0,0,0.1); } h1 { - color: #333; text-align: center; } .model-name { @@ -144,15 +143,15 @@ font-size: 14px; } - .image-section { + .section { text-align: center; margin-top: 40px; } - .image-section h2 { + .section h2 { font-size: 30px; margin-bottom: 20px; } - .image-section p { + .section p { margin: auto; padding-left: 150px; padding-right: 150px; @@ -204,22 +203,52 @@

    Stick To Your Role! Leaderboard

    Model: {{ model_name }}
    -
    +
    +

    Model details

    + {{ model_detail|safe }} + +
    +
    +

    Detailed results

    +

    + Below we show detailed results and visualizations for each metric in each context chunk. + We are scoring the expressed values of a simulated participant in a context. + The population is simulated 9 times, once for each context chunk. + A context chunk is a set of 50 contexts - one context for each individual. + For instance, chunks_0-4 contain reddit posts (longest in chunk_0, shortest in chunk_4). + When comparing chunk_0 and chunk_4, the conversations with the participants are initialized first with posts from chunk_0 and then with posts form chunk_4. + Metrics and chunks are explained in more detail on the Motivation and Methodology page. +

    + +
    +

    Structure

    This image shows the circular value structure projected on a 2D plane. - This was done by computing the intercorrelations between different values this space was then reduces with a SVD based approach and varimax rotation (`FactorAnalysis` object from `scikit-learn`). - The theoretical order is shown in the top left figure. - The distance is computed as the average distance of each value to it's rank in the theoretical order. - The minimal distance with the theoretical order in the clockwise and counter-clockwise direction was taken as the final distance. + This was done by computing the intercorrelations between different values this space was then reduced with a SVD-based approach and varimax rotation (`FactorAnalysis` object from `scikit-learn`). + The theoretical order (shown in the top left figure) was used to initialize the SVD. + Stress denotes the fit quality. + +

    -
    +
    +

    Confirmatory Factor Analysis metrics

    +

    + This tables show the metrics resulting from the Magnifying class CFA procedure: + for each context chunk four CFA models are fit (one for each high level value). + The average of the metrics for those four CFA models are shown for each context chunk. +

    +
    + {{ cfa_table_html|safe }} +
    +
    +

    Pairwise Rank-Order stability

    This image shows the Rank-Order stability between each pair of context chunks. @@ -229,23 +258,12 @@ Refer to our paper for details.

    - -

    Confirmatory Factor Analysis metrics

    -

    - This tables show the metrics resulting from the Magnifying class CFA procedure: - for each context chunk four CFA models are fit (one for each high level value). - The average of the metrics for those four CFA models are shown for each context chunk. -

    -
    - {{ cfa_table_html|safe }} -
    -
    -
    +

    Visualizing the order of simulated personas

    This image shows the order of personas in each context chunk for each value. @@ -253,8 +271,8 @@ Therefore, the Rank-Order stability between the `no_conv` chunk and some chunk corresponds to the extent to which the curve is increasing in that chunk.