Suqi Sun commited on
Commit
911c9b4
1 Parent(s): e07beed

Upload more metrics and fix some issues in app.py

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. app.py +20 -17
  2. eval-results/medqa/0/ckpt_003/medqa_4options.jsonl.tar.gz +0 -0
  3. eval-results/medqa/0/ckpt_003/results.json.tar.gz +0 -0
  4. eval-results/medqa/0/ckpt_006/medqa_4options.jsonl.tar.gz +0 -0
  5. eval-results/medqa/0/ckpt_006/results.json.tar.gz +0 -0
  6. eval-results/medqa/0/ckpt_009/medqa_4options.jsonl.tar.gz +0 -0
  7. eval-results/medqa/0/ckpt_009/results.json.tar.gz +0 -0
  8. eval-results/medqa/0/ckpt_012/medqa_4options.jsonl.tar.gz +0 -0
  9. eval-results/medqa/0/ckpt_012/results.json.tar.gz +0 -0
  10. eval-results/medqa/0/ckpt_015/medqa_4options.jsonl.tar.gz +0 -0
  11. eval-results/medqa/0/ckpt_015/results.json.tar.gz +0 -0
  12. eval-results/medqa/0/ckpt_018/medqa_4options.jsonl.tar.gz +0 -0
  13. eval-results/medqa/0/ckpt_018/results.json.tar.gz +0 -0
  14. eval-results/medqa/0/ckpt_021/medqa_4options.jsonl.tar.gz +0 -0
  15. eval-results/medqa/0/ckpt_021/results.json.tar.gz +0 -0
  16. eval-results/medqa/0/ckpt_024/medqa_4options.jsonl.tar.gz +0 -0
  17. eval-results/medqa/0/ckpt_024/results.json.tar.gz +0 -0
  18. eval-results/medqa/0/ckpt_027/medqa_4options.jsonl.tar.gz +0 -0
  19. eval-results/medqa/0/ckpt_027/results.json.tar.gz +0 -0
  20. eval-results/medqa/0/ckpt_030/medqa_4options.jsonl.tar.gz +0 -0
  21. eval-results/medqa/0/ckpt_030/results.json.tar.gz +0 -0
  22. eval-results/medqa/0/ckpt_033/medqa_4options.jsonl.tar.gz +0 -0
  23. eval-results/medqa/0/ckpt_033/results.json.tar.gz +0 -0
  24. eval-results/medqa/0/ckpt_036/medqa_4options.jsonl.tar.gz +0 -0
  25. eval-results/medqa/0/ckpt_036/results.json.tar.gz +0 -0
  26. eval-results/medqa/0/ckpt_039/medqa_4options.jsonl.tar.gz +0 -0
  27. eval-results/medqa/0/ckpt_039/results.json.tar.gz +0 -0
  28. eval-results/medqa/0/ckpt_042/medqa_4options.jsonl.tar.gz +0 -0
  29. eval-results/medqa/0/ckpt_042/results.json.tar.gz +0 -0
  30. eval-results/medqa/0/ckpt_045/medqa_4options.jsonl.tar.gz +0 -0
  31. eval-results/medqa/0/ckpt_045/results.json.tar.gz +0 -0
  32. eval-results/medqa/0/ckpt_048/medqa_4options.jsonl.tar.gz +0 -0
  33. eval-results/medqa/0/ckpt_048/results.json.tar.gz +0 -0
  34. eval-results/medqa/0/ckpt_051/medqa_4options.jsonl.tar.gz +0 -0
  35. eval-results/medqa/0/ckpt_051/results.json.tar.gz +0 -0
  36. eval-results/medqa/0/ckpt_054/medqa_4options.jsonl.tar.gz +0 -0
  37. eval-results/medqa/0/ckpt_054/results.json.tar.gz +0 -0
  38. eval-results/medqa/0/ckpt_057/medqa_4options.jsonl.tar.gz +0 -0
  39. eval-results/medqa/0/ckpt_057/results.json.tar.gz +0 -0
  40. eval-results/medqa/0/ckpt_060/medqa_4options.jsonl.tar.gz +0 -0
  41. eval-results/medqa/0/ckpt_060/results.json.tar.gz +0 -0
  42. eval-results/medqa/0/ckpt_063/medqa_4options.jsonl.tar.gz +0 -0
  43. eval-results/medqa/0/ckpt_063/results.json.tar.gz +0 -0
  44. eval-results/medqa/0/ckpt_066/medqa_4options.jsonl.tar.gz +0 -0
  45. eval-results/medqa/0/ckpt_066/results.json.tar.gz +0 -0
  46. eval-results/medqa/0/ckpt_069/medqa_4options.jsonl.tar.gz +0 -0
  47. eval-results/medqa/0/ckpt_069/results.json.tar.gz +0 -0
  48. eval-results/medqa/0/ckpt_072/medqa_4options.jsonl.tar.gz +0 -0
  49. eval-results/medqa/0/ckpt_072/results.json.tar.gz +0 -0
  50. eval-results/medqa/0/ckpt_075/medqa_4options.jsonl.tar.gz +0 -0
app.py CHANGED
@@ -2,7 +2,6 @@ import os
2
  import streamlit as st
3
  import json
4
  import tarfile
5
- from base64 import b64encode
6
 
7
  st.set_page_config(layout="wide")
8
 
@@ -13,10 +12,8 @@ st.markdown("""The K2 gallery allows one to browse the output of various evaluat
13
 
14
 
15
  with st.sidebar:
16
- with open(os.path.join(PARENT_DIR, "k2-logo.svg"), 'r') as f:
17
- b64 = b64encode(f.read().encode('utf-8')).decode("utf-8")
18
- html = f"<img src='https://www.llm360.ai/images/logo-highres.png' width='100' /><img src='data:image/svg+xml;base64,{b64}' width='100' />"
19
- st.markdown(html, unsafe_allow_html=True)
20
 
21
  metric = st.radio(
22
  "Choose a metric", options=os.listdir(os.path.join(EVAL_DIR))
@@ -28,18 +25,24 @@ with st.sidebar:
28
 
29
  col1, col2 = st.columns(2)
30
 
31
- with col1:
32
- st.header("Checkpoint A")
33
- ckpt = st.selectbox('Select a checkpoint', sorted(os.listdir(os.path.join(EVAL_DIR, metric, n_shot))), key="A1")
34
  st.write(f'Veiwing Evaluation Results for Checkpoint: `{ckpt}`')
35
- file = st.selectbox("Select a file", sorted(os.listdir(os.path.join(EVAL_DIR, metric, n_shot, ckpt))), key="A2")
36
- with tarfile.open(os.path.join(EVAL_DIR, metric, n_shot, ckpt, file), "r:gz") as f:
37
- st.json(json.load(f.extractfile(f.next())))
 
 
 
 
 
 
 
 
 
 
 
38
 
39
  with col2:
40
- st.header("Checkpoint B")
41
- ckpt = st.selectbox('Select a checkpoint', sorted(os.listdir(os.path.join(EVAL_DIR, metric, n_shot))), key="B1")
42
- st.write(f'Veiwing Evaluation Results for Checkpoint: `{ckpt}`')
43
- file = st.selectbox("Select a file", sorted(os.listdir(os.path.join(EVAL_DIR, metric, n_shot, ckpt))), key="B2")
44
- with tarfile.open(os.path.join(EVAL_DIR, metric, n_shot, ckpt, file), "r:gz") as f:
45
- st.json(json.load(f.extractfile(f.next())))
 
2
  import streamlit as st
3
  import json
4
  import tarfile
 
5
 
6
  st.set_page_config(layout="wide")
7
 
 
12
 
13
 
14
  with st.sidebar:
15
+ html = f"<img src='https://www.llm360.ai/images/logo-highres.png' width='100' /><img src='https://huggingface.co/spaces/LLM360/k2-eval-gallery/raw/main/k2-logo.svg' width='100' />"
16
+ st.markdown(html, unsafe_allow_html=True)
 
 
17
 
18
  metric = st.radio(
19
  "Choose a metric", options=os.listdir(os.path.join(EVAL_DIR))
 
25
 
26
  col1, col2 = st.columns(2)
27
 
28
+ def render_column(col_label):
29
+ st.header(f"Checkpoint {col_label}")
30
+ ckpt = st.select_slider('Select a checkpoint', sorted(os.listdir(os.path.join(EVAL_DIR, metric, n_shot))), key=col_label + '1')
31
  st.write(f'Veiwing Evaluation Results for Checkpoint: `{ckpt}`')
32
+ file = st.selectbox("Select a file", sorted(f_name[:-len(".tar.gz")] for f_name in os.listdir(os.path.join(EVAL_DIR, metric, n_shot, ckpt))), key=col_label + '2')
33
+ file += ".tar.gz"
34
+ with tarfile.open(os.path.join(EVAL_DIR, metric, n_shot, ckpt, file), "r:gz") as tar:
35
+ f = tar.extractfile(tar.next())
36
+ eval_json = json.load(f)
37
+ if isinstance(eval_json, list):
38
+ doc_id = st.slider("Select a document id", 0, len(eval_json) - 1, 0, 1, key=col_label + '3')
39
+ st.json(eval_json[doc_id])
40
+ else:
41
+ st.json(eval_json)
42
+ f.close()
43
+
44
+ with col1:
45
+ render_column('A')
46
 
47
  with col2:
48
+ render_column('B')
 
 
 
 
 
eval-results/medqa/0/ckpt_003/medqa_4options.jsonl.tar.gz ADDED
Binary file (692 kB). View file
 
eval-results/medqa/0/ckpt_003/results.json.tar.gz ADDED
Binary file (2.81 kB). View file
 
eval-results/medqa/0/ckpt_006/medqa_4options.jsonl.tar.gz ADDED
Binary file (691 kB). View file
 
eval-results/medqa/0/ckpt_006/results.json.tar.gz ADDED
Binary file (2.81 kB). View file
 
eval-results/medqa/0/ckpt_009/medqa_4options.jsonl.tar.gz ADDED
Binary file (690 kB). View file
 
eval-results/medqa/0/ckpt_009/results.json.tar.gz ADDED
Binary file (2.84 kB). View file
 
eval-results/medqa/0/ckpt_012/medqa_4options.jsonl.tar.gz ADDED
Binary file (692 kB). View file
 
eval-results/medqa/0/ckpt_012/results.json.tar.gz ADDED
Binary file (2.81 kB). View file
 
eval-results/medqa/0/ckpt_015/medqa_4options.jsonl.tar.gz ADDED
Binary file (691 kB). View file
 
eval-results/medqa/0/ckpt_015/results.json.tar.gz ADDED
Binary file (2.81 kB). View file
 
eval-results/medqa/0/ckpt_018/medqa_4options.jsonl.tar.gz ADDED
Binary file (691 kB). View file
 
eval-results/medqa/0/ckpt_018/results.json.tar.gz ADDED
Binary file (2.81 kB). View file
 
eval-results/medqa/0/ckpt_021/medqa_4options.jsonl.tar.gz ADDED
Binary file (691 kB). View file
 
eval-results/medqa/0/ckpt_021/results.json.tar.gz ADDED
Binary file (2.81 kB). View file
 
eval-results/medqa/0/ckpt_024/medqa_4options.jsonl.tar.gz ADDED
Binary file (691 kB). View file
 
eval-results/medqa/0/ckpt_024/results.json.tar.gz ADDED
Binary file (2.81 kB). View file
 
eval-results/medqa/0/ckpt_027/medqa_4options.jsonl.tar.gz ADDED
Binary file (690 kB). View file
 
eval-results/medqa/0/ckpt_027/results.json.tar.gz ADDED
Binary file (2.81 kB). View file
 
eval-results/medqa/0/ckpt_030/medqa_4options.jsonl.tar.gz ADDED
Binary file (691 kB). View file
 
eval-results/medqa/0/ckpt_030/results.json.tar.gz ADDED
Binary file (2.81 kB). View file
 
eval-results/medqa/0/ckpt_033/medqa_4options.jsonl.tar.gz ADDED
Binary file (691 kB). View file
 
eval-results/medqa/0/ckpt_033/results.json.tar.gz ADDED
Binary file (2.81 kB). View file
 
eval-results/medqa/0/ckpt_036/medqa_4options.jsonl.tar.gz ADDED
Binary file (691 kB). View file
 
eval-results/medqa/0/ckpt_036/results.json.tar.gz ADDED
Binary file (2.81 kB). View file
 
eval-results/medqa/0/ckpt_039/medqa_4options.jsonl.tar.gz ADDED
Binary file (691 kB). View file
 
eval-results/medqa/0/ckpt_039/results.json.tar.gz ADDED
Binary file (2.81 kB). View file
 
eval-results/medqa/0/ckpt_042/medqa_4options.jsonl.tar.gz ADDED
Binary file (691 kB). View file
 
eval-results/medqa/0/ckpt_042/results.json.tar.gz ADDED
Binary file (2.82 kB). View file
 
eval-results/medqa/0/ckpt_045/medqa_4options.jsonl.tar.gz ADDED
Binary file (692 kB). View file
 
eval-results/medqa/0/ckpt_045/results.json.tar.gz ADDED
Binary file (2.81 kB). View file
 
eval-results/medqa/0/ckpt_048/medqa_4options.jsonl.tar.gz ADDED
Binary file (690 kB). View file
 
eval-results/medqa/0/ckpt_048/results.json.tar.gz ADDED
Binary file (2.82 kB). View file
 
eval-results/medqa/0/ckpt_051/medqa_4options.jsonl.tar.gz ADDED
Binary file (692 kB). View file
 
eval-results/medqa/0/ckpt_051/results.json.tar.gz ADDED
Binary file (2.82 kB). View file
 
eval-results/medqa/0/ckpt_054/medqa_4options.jsonl.tar.gz ADDED
Binary file (692 kB). View file
 
eval-results/medqa/0/ckpt_054/results.json.tar.gz ADDED
Binary file (2.81 kB). View file
 
eval-results/medqa/0/ckpt_057/medqa_4options.jsonl.tar.gz ADDED
Binary file (692 kB). View file
 
eval-results/medqa/0/ckpt_057/results.json.tar.gz ADDED
Binary file (2.84 kB). View file
 
eval-results/medqa/0/ckpt_060/medqa_4options.jsonl.tar.gz ADDED
Binary file (692 kB). View file
 
eval-results/medqa/0/ckpt_060/results.json.tar.gz ADDED
Binary file (2.84 kB). View file
 
eval-results/medqa/0/ckpt_063/medqa_4options.jsonl.tar.gz ADDED
Binary file (692 kB). View file
 
eval-results/medqa/0/ckpt_063/results.json.tar.gz ADDED
Binary file (2.81 kB). View file
 
eval-results/medqa/0/ckpt_066/medqa_4options.jsonl.tar.gz ADDED
Binary file (693 kB). View file
 
eval-results/medqa/0/ckpt_066/results.json.tar.gz ADDED
Binary file (2.81 kB). View file
 
eval-results/medqa/0/ckpt_069/medqa_4options.jsonl.tar.gz ADDED
Binary file (693 kB). View file
 
eval-results/medqa/0/ckpt_069/results.json.tar.gz ADDED
Binary file (2.81 kB). View file
 
eval-results/medqa/0/ckpt_072/medqa_4options.jsonl.tar.gz ADDED
Binary file (692 kB). View file
 
eval-results/medqa/0/ckpt_072/results.json.tar.gz ADDED
Binary file (2.81 kB). View file
 
eval-results/medqa/0/ckpt_075/medqa_4options.jsonl.tar.gz ADDED
Binary file (692 kB). View file