Spaces:
Running
Running
Suqi Sun
commited on
Commit
•
911c9b4
1
Parent(s):
e07beed
Upload more metrics and fix some issues in app.py
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- app.py +20 -17
- eval-results/medqa/0/ckpt_003/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_003/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_006/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_006/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_009/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_009/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_012/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_012/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_015/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_015/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_018/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_018/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_021/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_021/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_024/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_024/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_027/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_027/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_030/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_030/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_033/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_033/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_036/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_036/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_039/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_039/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_042/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_042/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_045/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_045/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_048/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_048/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_051/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_051/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_054/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_054/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_057/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_057/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_060/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_060/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_063/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_063/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_066/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_066/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_069/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_069/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_072/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_072/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_075/medqa_4options.jsonl.tar.gz +0 -0
app.py
CHANGED
@@ -2,7 +2,6 @@ import os
|
|
2 |
import streamlit as st
|
3 |
import json
|
4 |
import tarfile
|
5 |
-
from base64 import b64encode
|
6 |
|
7 |
st.set_page_config(layout="wide")
|
8 |
|
@@ -13,10 +12,8 @@ st.markdown("""The K2 gallery allows one to browse the output of various evaluat
|
|
13 |
|
14 |
|
15 |
with st.sidebar:
|
16 |
-
|
17 |
-
|
18 |
-
html = f"<img src='https://www.llm360.ai/images/logo-highres.png' width='100' /><img src='data:image/svg+xml;base64,{b64}' width='100' />"
|
19 |
-
st.markdown(html, unsafe_allow_html=True)
|
20 |
|
21 |
metric = st.radio(
|
22 |
"Choose a metric", options=os.listdir(os.path.join(EVAL_DIR))
|
@@ -28,18 +25,24 @@ with st.sidebar:
|
|
28 |
|
29 |
col1, col2 = st.columns(2)
|
30 |
|
31 |
-
|
32 |
-
st.header("Checkpoint
|
33 |
-
ckpt = st.
|
34 |
st.write(f'Veiwing Evaluation Results for Checkpoint: `{ckpt}`')
|
35 |
-
file = st.selectbox("Select a file", sorted(os.listdir(os.path.join(EVAL_DIR, metric, n_shot, ckpt))), key=
|
36 |
-
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
with col2:
|
40 |
-
|
41 |
-
ckpt = st.selectbox('Select a checkpoint', sorted(os.listdir(os.path.join(EVAL_DIR, metric, n_shot))), key="B1")
|
42 |
-
st.write(f'Veiwing Evaluation Results for Checkpoint: `{ckpt}`')
|
43 |
-
file = st.selectbox("Select a file", sorted(os.listdir(os.path.join(EVAL_DIR, metric, n_shot, ckpt))), key="B2")
|
44 |
-
with tarfile.open(os.path.join(EVAL_DIR, metric, n_shot, ckpt, file), "r:gz") as f:
|
45 |
-
st.json(json.load(f.extractfile(f.next())))
|
|
|
2 |
import streamlit as st
|
3 |
import json
|
4 |
import tarfile
|
|
|
5 |
|
6 |
st.set_page_config(layout="wide")
|
7 |
|
|
|
12 |
|
13 |
|
14 |
with st.sidebar:
|
15 |
+
html = f"<img src='https://www.llm360.ai/images/logo-highres.png' width='100' /><img src='https://huggingface.co/spaces/LLM360/k2-eval-gallery/raw/main/k2-logo.svg' width='100' />"
|
16 |
+
st.markdown(html, unsafe_allow_html=True)
|
|
|
|
|
17 |
|
18 |
metric = st.radio(
|
19 |
"Choose a metric", options=os.listdir(os.path.join(EVAL_DIR))
|
|
|
25 |
|
26 |
col1, col2 = st.columns(2)
|
27 |
|
28 |
+
def render_column(col_label):
|
29 |
+
st.header(f"Checkpoint {col_label}")
|
30 |
+
ckpt = st.select_slider('Select a checkpoint', sorted(os.listdir(os.path.join(EVAL_DIR, metric, n_shot))), key=col_label + '1')
|
31 |
st.write(f'Veiwing Evaluation Results for Checkpoint: `{ckpt}`')
|
32 |
+
file = st.selectbox("Select a file", sorted(f_name[:-len(".tar.gz")] for f_name in os.listdir(os.path.join(EVAL_DIR, metric, n_shot, ckpt))), key=col_label + '2')
|
33 |
+
file += ".tar.gz"
|
34 |
+
with tarfile.open(os.path.join(EVAL_DIR, metric, n_shot, ckpt, file), "r:gz") as tar:
|
35 |
+
f = tar.extractfile(tar.next())
|
36 |
+
eval_json = json.load(f)
|
37 |
+
if isinstance(eval_json, list):
|
38 |
+
doc_id = st.slider("Select a document id", 0, len(eval_json) - 1, 0, 1, key=col_label + '3')
|
39 |
+
st.json(eval_json[doc_id])
|
40 |
+
else:
|
41 |
+
st.json(eval_json)
|
42 |
+
f.close()
|
43 |
+
|
44 |
+
with col1:
|
45 |
+
render_column('A')
|
46 |
|
47 |
with col2:
|
48 |
+
render_column('B')
|
|
|
|
|
|
|
|
|
|
eval-results/medqa/0/ckpt_003/medqa_4options.jsonl.tar.gz
ADDED
Binary file (692 kB). View file
|
|
eval-results/medqa/0/ckpt_003/results.json.tar.gz
ADDED
Binary file (2.81 kB). View file
|
|
eval-results/medqa/0/ckpt_006/medqa_4options.jsonl.tar.gz
ADDED
Binary file (691 kB). View file
|
|
eval-results/medqa/0/ckpt_006/results.json.tar.gz
ADDED
Binary file (2.81 kB). View file
|
|
eval-results/medqa/0/ckpt_009/medqa_4options.jsonl.tar.gz
ADDED
Binary file (690 kB). View file
|
|
eval-results/medqa/0/ckpt_009/results.json.tar.gz
ADDED
Binary file (2.84 kB). View file
|
|
eval-results/medqa/0/ckpt_012/medqa_4options.jsonl.tar.gz
ADDED
Binary file (692 kB). View file
|
|
eval-results/medqa/0/ckpt_012/results.json.tar.gz
ADDED
Binary file (2.81 kB). View file
|
|
eval-results/medqa/0/ckpt_015/medqa_4options.jsonl.tar.gz
ADDED
Binary file (691 kB). View file
|
|
eval-results/medqa/0/ckpt_015/results.json.tar.gz
ADDED
Binary file (2.81 kB). View file
|
|
eval-results/medqa/0/ckpt_018/medqa_4options.jsonl.tar.gz
ADDED
Binary file (691 kB). View file
|
|
eval-results/medqa/0/ckpt_018/results.json.tar.gz
ADDED
Binary file (2.81 kB). View file
|
|
eval-results/medqa/0/ckpt_021/medqa_4options.jsonl.tar.gz
ADDED
Binary file (691 kB). View file
|
|
eval-results/medqa/0/ckpt_021/results.json.tar.gz
ADDED
Binary file (2.81 kB). View file
|
|
eval-results/medqa/0/ckpt_024/medqa_4options.jsonl.tar.gz
ADDED
Binary file (691 kB). View file
|
|
eval-results/medqa/0/ckpt_024/results.json.tar.gz
ADDED
Binary file (2.81 kB). View file
|
|
eval-results/medqa/0/ckpt_027/medqa_4options.jsonl.tar.gz
ADDED
Binary file (690 kB). View file
|
|
eval-results/medqa/0/ckpt_027/results.json.tar.gz
ADDED
Binary file (2.81 kB). View file
|
|
eval-results/medqa/0/ckpt_030/medqa_4options.jsonl.tar.gz
ADDED
Binary file (691 kB). View file
|
|
eval-results/medqa/0/ckpt_030/results.json.tar.gz
ADDED
Binary file (2.81 kB). View file
|
|
eval-results/medqa/0/ckpt_033/medqa_4options.jsonl.tar.gz
ADDED
Binary file (691 kB). View file
|
|
eval-results/medqa/0/ckpt_033/results.json.tar.gz
ADDED
Binary file (2.81 kB). View file
|
|
eval-results/medqa/0/ckpt_036/medqa_4options.jsonl.tar.gz
ADDED
Binary file (691 kB). View file
|
|
eval-results/medqa/0/ckpt_036/results.json.tar.gz
ADDED
Binary file (2.81 kB). View file
|
|
eval-results/medqa/0/ckpt_039/medqa_4options.jsonl.tar.gz
ADDED
Binary file (691 kB). View file
|
|
eval-results/medqa/0/ckpt_039/results.json.tar.gz
ADDED
Binary file (2.81 kB). View file
|
|
eval-results/medqa/0/ckpt_042/medqa_4options.jsonl.tar.gz
ADDED
Binary file (691 kB). View file
|
|
eval-results/medqa/0/ckpt_042/results.json.tar.gz
ADDED
Binary file (2.82 kB). View file
|
|
eval-results/medqa/0/ckpt_045/medqa_4options.jsonl.tar.gz
ADDED
Binary file (692 kB). View file
|
|
eval-results/medqa/0/ckpt_045/results.json.tar.gz
ADDED
Binary file (2.81 kB). View file
|
|
eval-results/medqa/0/ckpt_048/medqa_4options.jsonl.tar.gz
ADDED
Binary file (690 kB). View file
|
|
eval-results/medqa/0/ckpt_048/results.json.tar.gz
ADDED
Binary file (2.82 kB). View file
|
|
eval-results/medqa/0/ckpt_051/medqa_4options.jsonl.tar.gz
ADDED
Binary file (692 kB). View file
|
|
eval-results/medqa/0/ckpt_051/results.json.tar.gz
ADDED
Binary file (2.82 kB). View file
|
|
eval-results/medqa/0/ckpt_054/medqa_4options.jsonl.tar.gz
ADDED
Binary file (692 kB). View file
|
|
eval-results/medqa/0/ckpt_054/results.json.tar.gz
ADDED
Binary file (2.81 kB). View file
|
|
eval-results/medqa/0/ckpt_057/medqa_4options.jsonl.tar.gz
ADDED
Binary file (692 kB). View file
|
|
eval-results/medqa/0/ckpt_057/results.json.tar.gz
ADDED
Binary file (2.84 kB). View file
|
|
eval-results/medqa/0/ckpt_060/medqa_4options.jsonl.tar.gz
ADDED
Binary file (692 kB). View file
|
|
eval-results/medqa/0/ckpt_060/results.json.tar.gz
ADDED
Binary file (2.84 kB). View file
|
|
eval-results/medqa/0/ckpt_063/medqa_4options.jsonl.tar.gz
ADDED
Binary file (692 kB). View file
|
|
eval-results/medqa/0/ckpt_063/results.json.tar.gz
ADDED
Binary file (2.81 kB). View file
|
|
eval-results/medqa/0/ckpt_066/medqa_4options.jsonl.tar.gz
ADDED
Binary file (693 kB). View file
|
|
eval-results/medqa/0/ckpt_066/results.json.tar.gz
ADDED
Binary file (2.81 kB). View file
|
|
eval-results/medqa/0/ckpt_069/medqa_4options.jsonl.tar.gz
ADDED
Binary file (693 kB). View file
|
|
eval-results/medqa/0/ckpt_069/results.json.tar.gz
ADDED
Binary file (2.81 kB). View file
|
|
eval-results/medqa/0/ckpt_072/medqa_4options.jsonl.tar.gz
ADDED
Binary file (692 kB). View file
|
|
eval-results/medqa/0/ckpt_072/results.json.tar.gz
ADDED
Binary file (2.81 kB). View file
|
|
eval-results/medqa/0/ckpt_075/medqa_4options.jsonl.tar.gz
ADDED
Binary file (692 kB). View file
|
|