grg commited on
Commit
215d189
1 Parent(s): 40c29ba

Adding submit model instructions and route

Browse files
.gitignore CHANGED
@@ -1,3 +1,4 @@
1
  .idea/
2
  __pycache__/*
3
- copy_data.sh
 
 
1
  .idea/
2
  __pycache__/*
3
+ copy_data.sh
4
+ uploads/*
app.py CHANGED
@@ -1,9 +1,21 @@
1
- from flask import Flask, render_template
2
  import pandas as pd
3
  import utils
 
 
 
 
 
 
4
 
5
  app = Flask(__name__)
 
 
 
6
 
 
 
 
7
 
8
  @app.route('/')
9
  def index():
@@ -55,5 +67,66 @@ def model_detail(model_name):
55
  def about():
56
  return render_template('about.html')
57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  if __name__ == '__main__':
59
  app.run(host='0.0.0.0', port=7860, debug=True)
 
1
+ import os
2
  import pandas as pd
3
  import utils
4
+ import base64
5
+ import shutil
6
+ import zipfile
7
+ from flask import Flask, render_template, request, redirect, url_for
8
+ from postmarker.core import PostmarkClient
9
+ from werkzeug.utils import secure_filename
10
 
11
  app = Flask(__name__)
12
+ app.config['UPLOAD_FOLDER'] = 'uploads' # Directory where files will be stored
13
+ os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
14
+ app.config['ALLOWED_EXTENSIONS'] = {'zip'}
15
 
16
+ def allowed_file(filename):
17
+ return '.' in filename and \
18
+ filename.rsplit('.', 1)[1].lower() in app.config['ALLOWED_EXTENSIONS']
19
 
20
  @app.route('/')
21
  def index():
 
67
  def about():
68
  return render_template('about.html')
69
 
70
+ @app.route('/new_model')
71
+ def new_model():
72
+ return render_template('new_model.html')
73
+
74
+ @app.route('/model_submitted')
75
+ def model_submitted():
76
+ return render_template('model_submitted.html')
77
+
78
+ @app.route('/failed_submission')
79
+ def failed_submission():
80
+ return render_template('failed_submission.html')
81
+
82
+
83
+ @app.route('/submit_model', methods=['POST'])
84
+ def submit_model():
85
+ model_name = request.form['model_name']
86
+ pull_request_link = request.form['pull_request_link']
87
+ email = request.form['email']
88
+ description = request.form['description']
89
+
90
+ # Handle ZIP file upload
91
+ if 'model_files' not in request.files:
92
+ return redirect(url_for('failed_submission'))
93
+
94
+ file = request.files['model_files']
95
+
96
+ if file and allowed_file(file.filename):
97
+ filename = secure_filename(file.filename)
98
+ file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
99
+ file.save(file_path)
100
+
101
+ # Read the file content and encode it in base64
102
+ with open(file_path, 'rb') as f:
103
+ file_content = base64.b64encode(f.read()).decode('ascii')
104
+
105
+ # Set up Postmark email client
106
+ postmark = PostmarkClient(server_token=os.getenv('POSTMARK_SERVER_API'))
107
+
108
+ # Send the email with the attachment
109
+ postmark.emails.send(
110
+ From='grgur.kovac@inria.fr',
111
+ To='grgur.kovac@inria.fr',
112
+ Subject=f'Stick to Your Role! Model Submission: {model_name}',
113
+ HtmlBody=f"""
114
+ <p><strong>Model Name:</strong> {model_name}</p>
115
+ <p><strong>Pull Request Link:</strong> {pull_request_link}</p>
116
+ <p><strong>Email:</strong> {email}</p>
117
+ <p><strong>Description:</strong> {description}</p>
118
+ """,
119
+ Attachments=[{
120
+ 'Name': filename,
121
+ 'Content': file_content,
122
+ 'ContentType': 'application/zip'
123
+ }]
124
+ )
125
+ else:
126
+ return redirect(url_for('failed_submission'))
127
+
128
+ return redirect(url_for('model_submitted'))
129
+
130
+
131
  if __name__ == '__main__':
132
  app.run(host='0.0.0.0', port=7860, debug=True)
static/figures/cardinal.svg CHANGED
static/figures/ordinal.svg CHANGED
static/leaderboard.csv CHANGED
@@ -1,18 +1,20 @@
1
  Model,Ordinal (Win rate),Cardinal (Score),RO Stability,Stress,Separability,CFI,SRMR,RMSEA
2
- phi-3-mini-128k-instruct,0.3387345679012346,0.4571976280473622,0.039299993295009855,0.281800547806919,0.963768115942029,0.7509527777777777,0.25489166666666674,0.22045000000000003
3
- phi-3-medium-128k-instruct,0.35108024691358025,0.46871557360419164,0.09692037989916814,0.2651981204439735,0.9975845410628019,0.6727694444444445,0.2984500000000001,0.2759472222222221
4
- Mistral-7B-Instruct-v0.1,0.20679012345679013,0.38323622857524176,0.027216280472015988,0.2829498135031582,0.995169082125604,0.500288888888889,0.45314444444444446,0.4191027777777777
5
- Mistral-7B-Instruct-v0.2,0.39814814814814814,0.4692343788574553,0.14417876497818388,0.265188983528973,1.0,0.5787944444444445,0.35010277777777776,0.3171083333333333
6
- Mistral-7B-Instruct-v0.3,0.2824074074074074,0.4168826678339619,0.07960539866974455,0.2742399030139009,0.9975845410628019,0.5231444444444444,0.4214972222222223,0.3914694444444443
7
- Mixtral-8x7B-Instruct-v0.1,0.4930555555555556,0.5307045793457128,0.21473356319081474,0.2624402608740656,1.0,0.6766166666666665,0.25611666666666666,0.24065277777777772
8
- Mixtral-8x22B-Instruct-v0.1,0.2924382716049383,0.41811429894732177,0.1414001940345544,0.2548838005881672,0.9654589371980676,0.45902777777777776,0.4849916666666666,0.4871833333333333
9
- command_r_plus,0.5879629629629629,0.6136142726835458,0.3429686514651868,0.23811982320641845,0.963768115942029,0.7772111111111112,0.17755277777777778,0.17465277777777777
10
- llama_3_8b_instruct,0.5007716049382716,0.5571604188191388,0.24527785038654715,0.245806400289881,0.961352657004831,0.7348277777777779,0.20952222222222228,0.20751944444444437
11
- llama_3_70b_instruct,0.7376543209876543,0.7573878472446817,0.607020698814379,0.18525883672204868,1.0,0.8298166666666668,0.10965277777777771,0.14649722222222217
12
- llama_3.1_8b_instruct,0.5671296296296297,0.6056589663453942,0.4295080949846363,0.22060228669473025,0.9710144927536233,0.6379333333333334,0.3225500000000001,0.3328972222222223
13
- llama_3.1_70b_instruct,0.7739197530864198,0.78874072958529,0.691365862744007,0.1709718847084183,0.9944444444444444,0.8203805555555554,0.14023055555555552,0.17041944444444446
14
- Qwen2-7B-Instruct,0.4529320987654321,0.5256131964101429,0.25108519506513916,0.25776537005719313,0.9855072463768116,0.6248583333333334,0.32358611111111113,0.3028361111111111
15
- Qwen2-72B-Instruct,0.6080246913580247,0.6858608495773215,0.6465993243020925,0.20297742879025626,0.9833333333333333,0.5559722222222221,0.3575638888888889,0.39241388888888884
16
- gpt-3.5-turbo-0125,0.23842592592592593,0.4028828123262879,0.08240359836763214,0.28728574920060357,1.0,0.4998916666666666,0.47583055555555553,0.4404444444444445
17
- gpt-4o-0513,0.7229938271604939,0.707844597747704,0.5122163952167618,0.19201420113771173,1.0,0.7998694444444445,0.14606111111111109,0.1400583333333334
18
- dummy,0.14814814814814814,0.3585809973377891,-0.009004148398032956,0.2928877637010999,1.0,0.5076361111111111,0.4973388888888889,0.4541638888888889
 
 
 
1
  Model,Ordinal (Win rate),Cardinal (Score),RO Stability,Stress,Separability,CFI,SRMR,RMSEA
2
+ phi-3-mini-128k-instruct,0.32853223593964337,0.4571976280473622,0.039299993295009855,0.281800547806919,0.963768115942029,0.7509527777777777,0.25489166666666674,0.22045000000000003
3
+ phi-3-medium-128k-instruct,0.34224965706447186,0.46871557360419164,0.09692037989916814,0.2651981204439735,0.9975845410628019,0.6727694444444445,0.2984500000000001,0.2759472222222221
4
+ Mistral-7B-Instruct-v0.1,0.19958847736625512,0.38323622857524176,0.027216280472015988,0.2829498135031582,0.995169082125604,0.500288888888889,0.45314444444444446,0.4191027777777777
5
+ Mistral-7B-Instruct-v0.2,0.38545953360768176,0.4692343788574553,0.14417876497818388,0.265188983528973,1.0,0.5787944444444445,0.35010277777777776,0.3171083333333333
6
+ Mistral-7B-Instruct-v0.3,0.2702331961591221,0.4168826678339619,0.07960539866974455,0.2742399030139009,0.9975845410628019,0.5231444444444444,0.4214972222222223,0.3914694444444443
7
+ Mixtral-8x7B-Instruct-v0.1,0.4746227709190672,0.5307045793457128,0.21473356319081474,0.2624402608740656,1.0,0.6766166666666665,0.25611666666666666,0.24065277777777772
8
+ Mixtral-8x22B-Instruct-v0.1,0.2791495198902606,0.41811429894732177,0.1414001940345544,0.2548838005881672,0.9654589371980676,0.45902777777777776,0.4849916666666666,0.4871833333333333
9
+ command_r_plus,0.5761316872427983,0.6136142726835458,0.3429686514651868,0.23811982320641845,0.963768115942029,0.7772111111111112,0.17755277777777778,0.17465277777777777
10
+ llama_3_8b_instruct,0.49108367626886146,0.5571604188191388,0.24527785038654715,0.245806400289881,0.961352657004831,0.7348277777777779,0.20952222222222228,0.20751944444444437
11
+ llama_3_70b_instruct,0.718792866941015,0.7573878472446817,0.607020698814379,0.18525883672204868,1.0,0.8298166666666668,0.10965277777777771,0.14649722222222217
12
+ llama_3.1_8b_instruct,0.5521262002743484,0.6056589663453942,0.4295080949846363,0.22060228669473025,0.9710144927536233,0.6379333333333334,0.3225500000000001,0.3328972222222223
13
+ llama_3.1_70b_instruct,0.7517146776406035,0.78874072958529,0.691365862744007,0.1709718847084183,0.9944444444444444,0.8203805555555554,0.14023055555555552,0.17041944444444446
14
+ Qwen2-7B-Instruct,0.4465020576131687,0.5256131964101429,0.25108519506513916,0.25776537005719313,0.9855072463768116,0.6248583333333334,0.32358611111111113,0.3028361111111111
15
+ Qwen2-72B-Instruct,0.5802469135802469,0.6858608495773215,0.6465993243020925,0.20297742879025626,0.9833333333333333,0.5559722222222221,0.3575638888888889,0.39241388888888884
16
+ gpt-3.5-turbo-0125,0.22565157750342937,0.4028828123262879,0.08240359836763214,0.28728574920060357,1.0,0.4998916666666666,0.47583055555555553,0.4404444444444445
17
+ gpt-4o-0513,0.705761316872428,0.707844597747704,0.5122163952167618,0.19201420113771173,1.0,0.7998694444444445,0.14606111111111109,0.1400583333333334
18
+ gpt-4o-mini-2024-07-18,0.37517146776406035,0.4740062039155729,0.13575309046266867,0.2707065266105181,1.0,0.6141777777777777,0.32648055555555555,0.29394722222222214
19
+ Mistral-Large-Instruct-2407,0.7613168724279836,0.8046038845509005,0.7644582301049158,0.16944638941325085,0.994806763285024,0.7604888888888888,0.18767499999999993,0.21457222222222228
20
+ dummy,0.14609053497942384,0.3585809973377891,-0.009004148398032956,0.2928877637010999,1.0,0.5076361111111111,0.4973388888888889,0.4541638888888889
templates/about.html CHANGED
@@ -349,7 +349,7 @@ their expression of that value).
349
  </p>
350
  </div>
351
  <div class="back-button">
352
- <a href="{{ url_for('index') }}" class="custom-button mt-3">Back</a>
353
  </div>
354
  <div class="citation-section">
355
  <p>If you found this project useful, please cite our related paper:</p>
 
349
  </p>
350
  </div>
351
  <div class="back-button">
352
+ <a href="{{ url_for('index') }}" class="custom-button mt-3">Main page</a>
353
  </div>
354
  <div class="citation-section">
355
  <p>If you found this project useful, please cite our related paper:</p>
templates/failed_submission.html ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Stick To Your Role! About</title>
7
+ <!-- Include Bootstrap CSS for styling -->
8
+ <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/5.1.3/css/bootstrap.min.css">
9
+ <!-- Include DataTables CSS -->
10
+ <link rel="stylesheet" href="https://cdn.datatables.net/1.11.5/css/dataTables.bootstrap5.min.css">
11
+ <!-- Custom CSS for additional styling -->
12
+ <style>
13
+ body {
14
+ background-color: #f8f9fa;
15
+ font-family: 'Arial', sans-serif;
16
+ }
17
+ .container {
18
+ max-width: 1200px; /* Limit the width of the container */
19
+ margin: auto; /* Center the container */
20
+ padding: 20px; /* Add some padding */
21
+ background: #fff;
22
+ border-radius: 8px;
23
+ box-shadow: 0 4px 8px rgba(0,0,0,0.1);
24
+ }
25
+ h1 {
26
+ color: #333;
27
+ text-align: center;
28
+ }
29
+ h2 {
30
+ color: #333;
31
+ margin-top: 30px;
32
+ text-align: center;
33
+ }
34
+ .table-responsive {
35
+ margin-top: 20px;
36
+ }
37
+ table {
38
+ border-collapse: separate;
39
+ border-spacing: 0;
40
+ font-size: 14px; /* Reduce the font size */
41
+ width: 100%;
42
+ border: none; /* Remove any default border */
43
+ }
44
+ table thead th {
45
+ background-color: #610b5d;
46
+ color: white;
47
+ border: 1px solid #dee2e6;
48
+ text-align: left;
49
+ }
50
+ table tbody tr {
51
+ background-color: #fff;
52
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
53
+ }
54
+ table tbody tr:hover {
55
+ background-color: #f1f1f1;
56
+ }
57
+ table td, table th {
58
+ padding: 10px; /* Reduce padding */
59
+ border: 1px solid #dee2e6;
60
+ }
61
+ table th:first-child {
62
+ border-top-left-radius: 10px;
63
+ }
64
+ table th:last-child {
65
+ border-top-right-radius: 10px;
66
+ }
67
+ .section{
68
+ padding-top: 19px;
69
+ text-align: left;
70
+ }
71
+
72
+ .section p {
73
+ padding-left: 150px;
74
+ padding-right: 150px;
75
+ text-indent: 2em;
76
+ margin: auto;
77
+ margin-bottom: 10px;
78
+ text-align: left;
79
+ }
80
+
81
+ .section ol, ul {
82
+ padding-left: 150px;
83
+ padding-right: 150px;
84
+ margin: auto;
85
+ margin-bottom: 20px;
86
+ margin-left: 50px;
87
+ text-align: left;
88
+ margin-top: 0px;
89
+ }
90
+
91
+ .citation-section {
92
+ width: 100%;
93
+ margin-top: 50px;
94
+ text-align: center;
95
+ }
96
+ .citation-box {
97
+ background-color: #f8f9fa;
98
+ border: 1px solid #dee2e6;
99
+ border-radius: 8px;
100
+ padding: 10px;
101
+ margin-top: 5px;
102
+ font-size: 15px;
103
+ text-align: left;
104
+ font-family: 'Courier New', Courier, monospace;
105
+ white-space: pre;
106
+ }
107
+
108
+ .image-container-structure {
109
+ display: flex;
110
+ justify-content: center;
111
+ gap: 10px;
112
+ margin-bottom: 40px;
113
+ max-width: 70%; /* Adjust the width as needed */
114
+ margin: auto;
115
+ }
116
+
117
+ .image-container-structure a {
118
+ flex: 1;
119
+ }
120
+
121
+ .image-container-structure img {
122
+ max-width: 100%;
123
+ height: auto;
124
+ display: block;
125
+ margin: auto;
126
+ }
127
+
128
+ .image-container {
129
+ width: 100%;
130
+ margin-bottom: 40px;
131
+ }
132
+ .image-container #admin-questionnaire {
133
+ width: 50%;
134
+ height: auto;
135
+ display: block;
136
+ margin: auto;
137
+ }
138
+ .image-container #ro-image {
139
+ width: 70%;
140
+ height: auto;
141
+ display: block;
142
+ margin: auto;
143
+ }
144
+
145
+ .section-title {
146
+ font-size: 24px;
147
+ font-weight: bold;
148
+ text-align: center;
149
+ margin-bottom: 40px;
150
+ padding: 20px; /* Add padding for more margin around text */
151
+ background-color: #610b5d;
152
+ color: #fff; /* Ensure text is readable on dark background */
153
+ border-radius: 15px; /* Rounded edges */
154
+ }
155
+ .back-button {
156
+ text-align: center;
157
+ margin-top: 50px;
158
+ }
159
+ .custom-button {
160
+ background-color: #610b5d;
161
+ color: #fff; /* Set white text color */
162
+ border-radius: 15px; /* Rounded edges */
163
+ padding: 10px 20px; /* Padding for the button */
164
+ font-size: 18px; /* Increase font size */
165
+ text-decoration: none; /* Remove underline */
166
+ }
167
+ .custom-button:hover {
168
+ background-color: #812b7d;
169
+ color: #fff;
170
+ }
171
+ </style>
172
+ </head>
173
+ <body>
174
+ <div class="container">
175
+ <h1 class="mt-5">Stick To Your Role! Leaderboard</h1>
176
+ <div class="table-responsive">
177
+ <!-- Render the table HTML here -->
178
+ {{ table_html|safe }}
179
+ </div>
180
+ <div class="section">
181
+ <div class="section-title">There was an issue with your submission.</div>
182
+ <p>
183
+ Try again or contact us at <a href= "mailto: grgur.kovac@inria.fr">grgur.kovac@inria.fr</a>.
184
+ </p>
185
+ <div class="back-button">
186
+ <a href="{{ url_for('index') }}" class="custom-button mt-3">Main page</a>
187
+ </div>
188
+ </div>
189
+ </div>
190
+ </div>
191
+
192
+ <!-- Include jQuery -->
193
+ <script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
194
+ <!-- Include Bootstrap JS -->
195
+ <script src="https://stackpath.bootstrapcdn.com/bootstrap/5.1.3/js/bootstrap.bundle.min.js"></script>
196
+ <!-- Include DataTables JS -->
197
+ <script src="https://cdn.datatables.net/1.11.5/js/jquery.dataTables.min.js"></script>
198
+ <script src="https://cdn.datatables.net/1.11.5/js/dataTables.bootstrap5.min.js"></script>
199
+ <!-- Initialize DataTables -->
200
+ <script>
201
+ $(document).ready(function() {
202
+ const table = $('table').DataTable({
203
+ "paging": false,
204
+ "info": false,
205
+ "columnDefs": [
206
+ { "orderable": false, "targets": 0 },
207
+ { "searchable": false, "targets": 0 }
208
+ ],
209
+ "order": [[ 2, 'desc' ]],
210
+ "drawCallback": function(settings) {
211
+ var api = this.api();
212
+ api.column(0, {order:'applied'}).nodes().each(function(cell, i) {
213
+ cell.innerHTML = i + 1;
214
+ });
215
+ }
216
+ });
217
+ });
218
+
219
+ </script>
220
+ </body>
221
+ </html>
templates/index.html CHANGED
@@ -41,6 +41,14 @@
41
  text-align: left;
42
  }
43
 
 
 
 
 
 
 
 
 
44
  .table-responsive {
45
  margin-top: 20px;
46
  max-width: 1000px; /* Adjust the width as needed */
@@ -195,8 +203,8 @@
195
  As proposed in our <a href="https://arxiv.org/abs/2402.14846">paper</a>,
196
  unwanted context-dependence should be seen as a <b>property of LLMs</b> - a dimension of LLM comparison (alongside others such as model size speed or expressed knowledge).
197
  This leaderboard aims to provide such a comparison and extends our paper with a more focused and elaborate experimental setup.
198
- Standard benchmarks present MANY questions from the SAME MINIMAL contexts (e.g. multiple choice questions),
199
- we present SAME questions from MANY different contexts.
200
  </p>
201
  <div class="table-responsive main-table">
202
  <!-- Render the table HTML here -->
@@ -238,6 +246,9 @@
238
  <div class="about-button">
239
  <a href="{{ url_for('about') }}" class="custom-button mt-3">Learn More About This Project</a>
240
  </div>
 
 
 
241
  <div class="citation-section">
242
  <p>
243
  If you found this project useful, please cite our related paper,
@@ -253,6 +264,12 @@
253
  }
254
  </div>
255
  </div>
 
 
 
 
 
 
256
  </div>
257
 
258
  <!-- Include jQuery -->
 
41
  text-align: left;
42
  }
43
 
44
+ ul {
45
+ margin: auto; /* Center the table */
46
+ margin-top: 20px;
47
+ margin-bottom: 10px;
48
+ max-width: 1000px; /* Adjust the width as needed */
49
+ text-align: left;
50
+ }
51
+
52
  .table-responsive {
53
  margin-top: 20px;
54
  max-width: 1000px; /* Adjust the width as needed */
 
203
  As proposed in our <a href="https://arxiv.org/abs/2402.14846">paper</a>,
204
  unwanted context-dependence should be seen as a <b>property of LLMs</b> - a dimension of LLM comparison (alongside others such as model size speed or expressed knowledge).
205
  This leaderboard aims to provide such a comparison and extends our paper with a more focused and elaborate experimental setup.
206
+ Standard benchmarks present <b>MANY</b> questions from the <b>SAME MINIMAL contexts</b> (e.g. multiple choice questions),
207
+ we present <b>SAME</b> questions from <b>MANY different contexts</b>.
208
  </p>
209
  <div class="table-responsive main-table">
210
  <!-- Render the table HTML here -->
 
246
  <div class="about-button">
247
  <a href="{{ url_for('about') }}" class="custom-button mt-3">Learn More About This Project</a>
248
  </div>
249
+ <div class="about-button">
250
+ <a href="{{ url_for('new_model') }}" class="custom-button mt-3">Submit a model</a>
251
+ </div>
252
  <div class="citation-section">
253
  <p>
254
  If you found this project useful, please cite our related paper,
 
264
  }
265
  </div>
266
  </div>
267
+ <ul>
268
+ <li>Contact: <a href="mailto: grgur.kovac@inria.fr">grgur.kovac@inria.fr</a></li>
269
+ <li>See the <a href="https://sites.google.com/view/llmvaluestability">Project website<a/></li>
270
+ <li>See the Flowers team <a href="http://developmentalsystems.org">blog</a> and <a href="https://flowers.inria.fr/">website</a></li>
271
+ <li>See Grgur's website and other projects: <a href="https://grgkovac.github.io/">https://grgkovac.github.io/</a></li>
272
+ </ul>
273
  </div>
274
 
275
  <!-- Include jQuery -->
templates/model_detail.html CHANGED
@@ -140,7 +140,7 @@
140
  </div>
141
  </div>
142
  <div class="back-button">
143
- <a href="{{ url_for('index') }}" class="custom-button mt-3">Back</a>
144
  </div>
145
  </div>
146
 
 
140
  </div>
141
  </div>
142
  <div class="back-button">
143
+ <a href="{{ url_for('index') }}" class="custom-button mt-3">Main page</a>
144
  </div>
145
  </div>
146
 
templates/model_submitted.html ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Stick To Your Role! About</title>
7
+ <!-- Include Bootstrap CSS for styling -->
8
+ <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/5.1.3/css/bootstrap.min.css">
9
+ <!-- Include DataTables CSS -->
10
+ <link rel="stylesheet" href="https://cdn.datatables.net/1.11.5/css/dataTables.bootstrap5.min.css">
11
+ <!-- Custom CSS for additional styling -->
12
+ <style>
13
+ body {
14
+ background-color: #f8f9fa;
15
+ font-family: 'Arial', sans-serif;
16
+ }
17
+ .container {
18
+ max-width: 1200px; /* Limit the width of the container */
19
+ margin: auto; /* Center the container */
20
+ padding: 20px; /* Add some padding */
21
+ background: #fff;
22
+ border-radius: 8px;
23
+ box-shadow: 0 4px 8px rgba(0,0,0,0.1);
24
+ }
25
+ h1 {
26
+ color: #333;
27
+ text-align: center;
28
+ }
29
+ h2 {
30
+ color: #333;
31
+ margin-top: 30px;
32
+ text-align: center;
33
+ }
34
+ .table-responsive {
35
+ margin-top: 20px;
36
+ }
37
+ table {
38
+ border-collapse: separate;
39
+ border-spacing: 0;
40
+ font-size: 14px; /* Reduce the font size */
41
+ width: 100%;
42
+ border: none; /* Remove any default border */
43
+ }
44
+ table thead th {
45
+ background-color: #610b5d;
46
+ color: white;
47
+ border: 1px solid #dee2e6;
48
+ text-align: left;
49
+ }
50
+ table tbody tr {
51
+ background-color: #fff;
52
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
53
+ }
54
+ table tbody tr:hover {
55
+ background-color: #f1f1f1;
56
+ }
57
+ table td, table th {
58
+ padding: 10px; /* Reduce padding */
59
+ border: 1px solid #dee2e6;
60
+ }
61
+ table th:first-child {
62
+ border-top-left-radius: 10px;
63
+ }
64
+ table th:last-child {
65
+ border-top-right-radius: 10px;
66
+ }
67
+ .section{
68
+ padding-top: 19px;
69
+ text-align: left;
70
+ }
71
+
72
+ .section p {
73
+ padding-left: 150px;
74
+ padding-right: 150px;
75
+ text-indent: 2em;
76
+ margin: auto;
77
+ margin-bottom: 10px;
78
+ text-align: left;
79
+ }
80
+
81
+ .section ol, ul {
82
+ padding-left: 150px;
83
+ padding-right: 150px;
84
+ margin: auto;
85
+ margin-bottom: 20px;
86
+ margin-left: 50px;
87
+ text-align: left;
88
+ margin-top: 0px;
89
+ }
90
+
91
+ .citation-section {
92
+ width: 100%;
93
+ margin-top: 50px;
94
+ text-align: center;
95
+ }
96
+ .citation-box {
97
+ background-color: #f8f9fa;
98
+ border: 1px solid #dee2e6;
99
+ border-radius: 8px;
100
+ padding: 10px;
101
+ margin-top: 5px;
102
+ font-size: 15px;
103
+ text-align: left;
104
+ font-family: 'Courier New', Courier, monospace;
105
+ white-space: pre;
106
+ }
107
+
108
+ .image-container-structure {
109
+ display: flex;
110
+ justify-content: center;
111
+ gap: 10px;
112
+ margin-bottom: 40px;
113
+ max-width: 70%; /* Adjust the width as needed */
114
+ margin: auto;
115
+ }
116
+
117
+ .image-container-structure a {
118
+ flex: 1;
119
+ }
120
+
121
+ .image-container-structure img {
122
+ max-width: 100%;
123
+ height: auto;
124
+ display: block;
125
+ margin: auto;
126
+ }
127
+
128
+ .image-container {
129
+ width: 100%;
130
+ margin-bottom: 40px;
131
+ }
132
+ .image-container #admin-questionnaire {
133
+ width: 50%;
134
+ height: auto;
135
+ display: block;
136
+ margin: auto;
137
+ }
138
+ .image-container #ro-image {
139
+ width: 70%;
140
+ height: auto;
141
+ display: block;
142
+ margin: auto;
143
+ }
144
+
145
+ .section-title {
146
+ font-size: 24px;
147
+ font-weight: bold;
148
+ text-align: center;
149
+ margin-bottom: 40px;
150
+ padding: 20px; /* Add padding for more margin around text */
151
+ background-color: #610b5d;
152
+ color: #fff; /* Ensure text is readable on dark background */
153
+ border-radius: 15px; /* Rounded edges */
154
+ }
155
+ .back-button {
156
+ text-align: center;
157
+ margin-top: 50px;
158
+ }
159
+ .custom-button {
160
+ background-color: #610b5d;
161
+ color: #fff; /* Set white text color */
162
+ border-radius: 15px; /* Rounded edges */
163
+ padding: 10px 20px; /* Padding for the button */
164
+ font-size: 18px; /* Increase font size */
165
+ text-decoration: none; /* Remove underline */
166
+ }
167
+ .custom-button:hover {
168
+ background-color: #812b7d;
169
+ color: #fff;
170
+ }
171
+ </style>
172
+ </head>
173
+ <body>
174
+ <div class="container">
175
+ <h1 class="mt-5">Stick To Your Role! Leaderboard</h1>
176
+ <div class="table-responsive">
177
+ <!-- Render the table HTML here -->
178
+ {{ table_html|safe }}
179
+ </div>
180
+ <div class="section">
181
+ <div class="section-title">Thank you for submitting your model!</div>
182
+ <p>
183
+ We will get back to you to confirm the reception of the model.
184
+ If we do not get back to you in the period of two weeks please contact us at:
185
+ <a href= "mailto: grgur.kovac@inria.fr">grgur.kovac@inria.fr</a>.
186
+ </p>
187
+ <div class="back-button">
188
+ <a href="{{ url_for('index') }}" class="custom-button mt-3">Main page</a>
189
+ </div>
190
+ </div>
191
+ </div>
192
+ </div>
193
+
194
+ <!-- Include jQuery -->
195
+ <script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
196
+ <!-- Include Bootstrap JS -->
197
+ <script src="https://stackpath.bootstrapcdn.com/bootstrap/5.1.3/js/bootstrap.bundle.min.js"></script>
198
+ <!-- Include DataTables JS -->
199
+ <script src="https://cdn.datatables.net/1.11.5/js/jquery.dataTables.min.js"></script>
200
+ <script src="https://cdn.datatables.net/1.11.5/js/dataTables.bootstrap5.min.js"></script>
201
+ <!-- Initialize DataTables -->
202
+ <script>
203
+ $(document).ready(function() {
204
+ const table = $('table').DataTable({
205
+ "paging": false,
206
+ "info": false,
207
+ "columnDefs": [
208
+ { "orderable": false, "targets": 0 },
209
+ { "searchable": false, "targets": 0 }
210
+ ],
211
+ "order": [[ 2, 'desc' ]],
212
+ "drawCallback": function(settings) {
213
+ var api = this.api();
214
+ api.column(0, {order:'applied'}).nodes().each(function(cell, i) {
215
+ cell.innerHTML = i + 1;
216
+ });
217
+ }
218
+ });
219
+ });
220
+
221
+ </script>
222
+ </body>
223
+ </html>
templates/new_model.html ADDED
@@ -0,0 +1,312 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Stick To Your Role! About</title>
7
+ <!-- Include Bootstrap CSS for styling -->
8
+ <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/5.1.3/css/bootstrap.min.css">
9
+ <!-- Include DataTables CSS -->
10
+ <link rel="stylesheet" href="https://cdn.datatables.net/1.11.5/css/dataTables.bootstrap5.min.css">
11
+ <!-- Custom CSS for additional styling -->
12
+ <style>
13
+ body {
14
+ background-color: #f8f9fa;
15
+ font-family: 'Arial', sans-serif;
16
+ }
17
+ .container {
18
+ max-width: 1200px; /* Limit the width of the container */
19
+ margin: auto; /* Center the container */
20
+ padding: 20px; /* Add some padding */
21
+ background: #fff;
22
+ border-radius: 8px;
23
+ box-shadow: 0 4px 8px rgba(0,0,0,0.1);
24
+ }
25
+ h1 {
26
+ color: #333;
27
+ text-align: center;
28
+ }
29
+ h2 {
30
+ color: #333;
31
+ margin-top: 30px;
32
+ text-align: center;
33
+ }
34
+
35
+ .section {
36
+ padding-top: 19px;
37
+ text-align: left;
38
+ }
39
+
40
+ .section p {
41
+ padding-left: 150px;
42
+ padding-right: 150px;
43
+ text-indent: 2em;
44
+ margin: auto;
45
+ margin-bottom: 10px;
46
+ text-align: left;
47
+ }
48
+
49
+ .section ol, ul {
50
+ padding-left: 150px;
51
+ padding-right: 150px;
52
+ margin: auto;
53
+ margin-bottom: 20px;
54
+ margin-left: 50px;
55
+ text-align: left;
56
+ margin-top: 0px;
57
+ }
58
+
59
+ .citation-section {
60
+ width: 100%;
61
+ margin-top: 50px;
62
+ text-align: center;
63
+ }
64
+ .citation-box {
65
+ background-color: #f8f9fa;
66
+ border: 1px solid #dee2e6;
67
+ border-radius: 8px;
68
+ padding: 10px;
69
+ margin-top: 5px;
70
+ font-size: 15px;
71
+ text-align: left;
72
+ font-family: 'Courier New', Courier, monospace;
73
+ white-space: pre;
74
+ }
75
+
76
+ .image-container-structure {
77
+ display: flex;
78
+ justify-content: center;
79
+ gap: 10px;
80
+ margin-bottom: 40px;
81
+ max-width: 70%; /* Adjust the width as needed */
82
+ margin: auto;
83
+ }
84
+
85
+ .image-container-structure a {
86
+ flex: 1;
87
+ }
88
+
89
+ .image-container-structure img {
90
+ max-width: 100%;
91
+ height: auto;
92
+ display: block;
93
+ margin: auto;
94
+ }
95
+
96
+ .image-container {
97
+ width: 100%;
98
+ margin-bottom: 40px;
99
+ }
100
+ .image-container #admin-questionnaire {
101
+ width: 50%;
102
+ height: auto;
103
+ display: block;
104
+ margin: auto;
105
+ }
106
+ .image-container #ro-image {
107
+ width: 70%;
108
+ height: auto;
109
+ display: block;
110
+ margin: auto;
111
+ }
112
+
113
+ .section-title {
114
+ font-size: 24px;
115
+ font-weight: bold;
116
+ text-align: center;
117
+ margin-bottom: 40px;
118
+ padding: 20px; /* Add padding for more margin around text */
119
+ background-color: #610b5d;
120
+ color: #fff; /* Ensure text is readable on dark background */
121
+ border-radius: 15px; /* Rounded edges */
122
+ }
123
+ .back-button {
124
+ text-align: center;
125
+ margin-top: 50px;
126
+ }
127
+ .custom-button {
128
+ background-color: #610b5d;
129
+ color: #fff; /* Set white text color */
130
+ border-radius: 15px; /* Rounded edges */
131
+ padding: 10px 20px; /* Padding for the button */
132
+ font-size: 18px; /* Increase font size */
133
+ text-decoration: none; /* Remove underline */
134
+ }
135
+ .custom-button:hover {
136
+ background-color: #812b7d;
137
+ color: #fff;
138
+ }
139
+ .form-container {
140
+ max-width: 80%; /* Adjust as needed */
141
+ margin: 20px 100px; /* Center horizontally */
142
+ padding: 50px 150px;
143
+ text-align: center;
144
+ background-color: #f8f9fa;
145
+ }
146
+
147
+ .form-row {
148
+ max-width: 100%;
149
+ margin-bottom: 20px;
150
+ text-align: left;
151
+ }
152
+
153
+ .form-label {
154
+ }
155
+
156
+ .col-md-4 {
157
+ width: 100%
158
+ }
159
+ .col-md-8 {
160
+ width: 100%
161
+ }
162
+
163
+ .form-content {
164
+ margin-bottom: 15px;
165
+ min-width: 100%;
166
+ }
167
+ .form-content::placeholder {
168
+ color: #aaa;
169
+ font-style: italic;
170
+ }
171
+
172
+ .file-input {
173
+ margin-top: 10px;
174
+ }
175
+ </style>
176
+ </head>
177
+ <body>
178
+ <div class="container">
179
+ <h1 class="mt-5">Stick To Your Role! Leaderboard</h1>
180
+ <div class="table-responsive">
181
+ <!-- Render the table HTML here -->
182
+ {{ table_html|safe }}
183
+ </div>
184
+ <div class="section">
185
+ <div id="evaluate_custom_model" class="section-title">Evaluate a custom model</div>
186
+ <p>
187
+ To evaluate a custom model you can use our <a href="https://gitlab.inria.fr/gkovac/value_stability">open-source code</a>.
188
+ If a model is in the huggingface transformers format (saved either localy or on the hub),
189
+ it can be simply added by adding a config file.
190
+ The model can then be evaluated as any other model.
191
+ To do so, follow the <a href="https://gitlab.inria.fr/gkovac/value_stability/-/blob/master/README.md?ref_type=heads#adding-a-new-model">instructions</a> in the README.md file.
192
+ </p>
193
+ </div>
194
+ <div class="section" id="paper">
195
+ <div class="section-title">Submit a custom model to the Stick To Your Role! Leaderboard</div>
196
+ <p>
197
+ If you want, your model can be to the Stick To Your Role! Leaderboard, as an unofficial submission.
198
+ A separate list of models containing both official and unofficial submissions will be created.
199
+ The procedure is as follows:
200
+ </p>
201
+ <ol>
202
+ <li>
203
+ <b> Add and evaluate your model </b> - Add your model as a config file as described <a href="{{ url_for('new_model', _anchor='evaluate_custom_model') }}">above</a>.
204
+ This procedure should result in 9 json files as such:
205
+ <code>`Leaderboard/results/stability_leaderboard/&lt;your_model_name&gt;/chunk_0_&lt;timestamp&gt;/results.json`</code>
206
+ </li>
207
+ <li>
208
+ <b> Submit the config file </b> - Create a pull request to our <a href="https://gitlab.inria.fr/gkovac/value_stability">repository</a> from a branch <code>"unofficial_model/&lt;your_model_name&gt;"</code>.
209
+ The pull request should ideally only add the config file in <code>`./models/leaderboard_configs`</code>.
210
+ If additional changes are needed, they should ideally be constrained to a new model class (see <a href="https://gitlab.inria.fr/gkovac/value_stability/-/blob/master/models/huggingfacemodel.py?ref_type=heads">huggingfacemodel.py</a> for reference).
211
+ <li>
212
+ <b> Submit the model results </b> - submit the *json files as a ZIP using the form below.
213
+ We will integrate the model's results on our side, and rerank models with yours included.
214
+ </li>
215
+ </ol>
216
+ <div class="form-container">
217
+ <form id="model-submission-form" method="POST" action="{{ url_for('submit_model') }}" enctype="multipart/form-data">
218
+ <div class="form-row row">
219
+ <div class="col-md-4">
220
+ <label for="model_name" class="form-label">Model Name:</label>
221
+ </div>
222
+ <div class="col-md-8">
223
+ <input type="text" class="form-content" id="model_name" name="model_name" required>
224
+ </div>
225
+ </div>
226
+ <div class="form-row row">
227
+ <div class="col-md-4">
228
+ <label for="pull_request_link" class="form-label">Pull Request Link:</label>
229
+ </div>
230
+ <div class="col-md-8">
231
+ <input type="url" class="form-content" id="pull_request_link" name="pull_request_link" required>
232
+ </div>
233
+ </div>
234
+ <div class="form-row row">
235
+ <div class="col-md-4">
236
+ <label for="email" class="form-label">Email:</label>
237
+ </div>
238
+ <div class="col-md-8">
239
+ <input type="email" class="form-content" id="email" name="email" required>
240
+ </div>
241
+ </div>
242
+ <div class="form-row row">
243
+ <div class="col-md-4">
244
+ <label for="description" class="form-label">Description:</label>
245
+ </div>
246
+ <div class="col-md-8">
247
+ <textarea class="form-content" id="description" name="description" placeholder="Various details on the model training and architecture (e.g. dataset, model size, optimizer, etc.)" rows="3" required></textarea>
248
+ </div>
249
+ </div>
250
+ <div class="form-row row">
251
+ <div class="col-md-4">
252
+ <label for="model_files" class="form-label">
253
+ Upload the Model results directory as a ZIP file
254
+ (<code>Leaderboard/results/stability_leaderboard/&lt;your_model_name&gt;</code>):
255
+ </label>
256
+ </div>
257
+ <div class="col-md-8">
258
+ <input type="file" id="model_files" name="model_files" class="file-input" accept=".zip" required>
259
+ <small class="form-text text-muted">
260
+ Please upload a ZIP file containing the results directory.
261
+ </small>
262
+ </div>
263
+ </div>
264
+ <button type="submit" class="btn custom-button mt-3">Submit</button>
265
+ </form>
266
+ </div>
267
+ </div>
268
+ <div class="back-button">
269
+ <a href="{{ url_for('index') }}" class="custom-button mt-3">Main page</a>
270
+ </div>
271
+ <div class="citation-section">
272
+ <p>If you found this project useful, please cite our related paper:</p>
273
+ <div class="citation-box" id="citation-text">
274
+ @article{kovavc2024stick,
275
+ title={Stick to your Role! Stability of Personal Values Expressed in Large Language Models},
276
+ author={Kova{\v{c}}, Grgur and Portelas, R{\'e}my and Sawayama, Masataka and Dominey, Peter Ford and Oudeyer, Pierre-Yves},
277
+ journal={arXiv preprint arXiv:2402.14846},
278
+ year={2024}
279
+ }
280
+ </div>
281
+ </div>
282
+ </div>
283
+
284
+ <!-- Include jQuery -->
285
+ <script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
286
+ <!-- Include Bootstrap JS -->
287
+ <script src="https://stackpath.bootstrapcdn.com/bootstrap/5.1.3/js/bootstrap.bundle.min.js"></script>
288
+ <!-- Include DataTables JS -->
289
+ <script src="https://cdn.datatables.net/1.11.5/js/jquery.dataTables.min.js"></script>
290
+ <script src="https://cdn.datatables.net/1.11.5/js/dataTables.bootstrap5.min.js"></script>
291
+ <!-- Initialize DataTables -->
292
+ <script>
293
+ $(document).ready(function() {
294
+ const table = $('table').DataTable({
295
+ "paging": false,
296
+ "info": false,
297
+ "columnDefs": [
298
+ { "orderable": false, "targets": 0 },
299
+ { "searchable": false, "targets": 0 }
300
+ ],
301
+ "order": [[ 2, 'desc' ]],
302
+ "drawCallback": function(settings) {
303
+ var api = this.api();
304
+ api.column(0, {order:'applied'}).nodes().each(function(cell, i) {
305
+ cell.innerHTML = i + 1;
306
+ });
307
+ }
308
+ });
309
+ });
310
+ </script>
311
+ </body>
312
+ </html>