rowankwang commited on
Commit
3e8cd27
1 Parent(s): 1c82c33

final data

Browse files
Files changed (2) hide show
  1. app.py +23 -28
  2. grid_eval_gpt4o.json +0 -0
app.py CHANGED
@@ -18,15 +18,15 @@ st.set_page_config(layout="wide")
18
  # config['preauthorized']
19
  # )
20
 
21
- file_path = 'synth_toy_eval.json'
22
 
23
  # Load your data
24
  @st.cache_data()
25
  def load_data():
26
  with open(file_path, 'r') as file:
27
  data = json.load(file)
28
- random.shuffle(data)
29
- data = data[:10]
30
  return data
31
 
32
  def save_data(data):
@@ -36,12 +36,13 @@ def save_data(data):
36
 
37
  def download_json(data):
38
  return json.dumps(data, indent=4)
 
39
  data = load_data()
40
 
41
  for query in data:
42
  for result in query['results']:
43
- if 'verified' not in result:
44
- result['verified'] = False
45
 
46
  # State management for current query index
47
  if 'current_query_index' not in st.session_state:
@@ -112,8 +113,18 @@ def display_query():
112
  mime="application/json"
113
  )
114
 
115
- st.markdown(f"<p>At index {st.session_state.current_query_index + 1}. Graded Queries: {st.session_state.graded_queries}/{len(st.session_state.data)}</p>", unsafe_allow_html=True)
116
-
 
 
 
 
 
 
 
 
 
 
117
  if st.session_state.graded_queries >= len(data):
118
  save_data(st.session_state.data)
119
  st.success(f"{len(data)} Queries graded and data saved!")
@@ -123,36 +134,22 @@ def display_query():
123
  st.header(f"Query: {current_query['query']}")
124
  status_color = 'green' if current_query.get('status', None) is not None else 'red'
125
  st.markdown(f"{current_query['grid_pos_str']} | Query Grade: <b style='color: {status_color};'>{'Graded' if status_color == 'green' else 'Ungraded'}</b>", unsafe_allow_html = True)
126
-
127
  st.subheader("Results:")
128
  for index, result in enumerate(current_query['results']):
129
  st.markdown(f"<div class='rounded-box'>", unsafe_allow_html=True)
130
  col1, col2 = st.columns([3, 2], gap="small")
131
- with col1:
132
- # title_style = f"color: {'green' if result.get('verified') is True else 'red' if result.get('verified') is False else 'white'};"
133
-
134
  st.markdown(f"<h5>{result['title']}</h5>", unsafe_allow_html=True)
135
  st.markdown(f"[<span style='font-size: 0.8em;'>{truncate_text(result['url'], length = 50)}</span>]({result['url']}) | {result['published_date']}", unsafe_allow_html=True)
136
  st.markdown(f"{truncate_text(result['text'], length = len(result['model_trace']))}")
137
  with col2:
138
  grade_color = 'green' if result['grade'].lower() == 'yes' else 'red'
139
- st.markdown(f"<b style='color: {grade_color};'>Model Grade: {result['grade']}</b>", unsafe_allow_html=True)
140
  st.write(result['model_trace'])
141
 
142
- if st.checkbox("Accept", value=result.get('verified'), key=f'verify-{index}'):
143
- result['verified'] = True
144
-
145
- # btn_cols = st.columns([1, 1])
146
- # with btn_cols[0]:
147
- # if st.button('Accept', key=f'accept-{index}'):
148
- # result['verified'] = True
149
- # if result.get('verified') is True:
150
- # st.write('Accepted')
151
- # with btn_cols[1]:
152
- # if st.button('Reject', key=f'reject-{index}'):
153
- # result['verified'] = False
154
- # if result.get('verified') is False:
155
- # st.write('Rejected')
156
 
157
  st.markdown("</div>", unsafe_allow_html=True)
158
  st.markdown(f"<div class='rounded-box'>", unsafe_allow_html=True)
@@ -160,8 +157,6 @@ def display_query():
160
  # Show current query and its results
161
  current_query = st.session_state.data[st.session_state.current_query_index]
162
 
163
-
164
-
165
  display_query()
166
 
167
  col1, col2 = st.columns([5, 1], gap="small")
 
18
  # config['preauthorized']
19
  # )
20
 
21
+ file_path = 'grid_eval_gpt4o.json'
22
 
23
  # Load your data
24
  @st.cache_data()
25
  def load_data():
26
  with open(file_path, 'r') as file:
27
  data = json.load(file)
28
+ # random.shuffle(data)
29
+ # data = data[]
30
  return data
31
 
32
  def save_data(data):
 
36
 
37
  def download_json(data):
38
  return json.dumps(data, indent=4)
39
+
40
  data = load_data()
41
 
42
  for query in data:
43
  for result in query['results']:
44
+ if 'agree' not in result:
45
+ result['agree'] = True
46
 
47
  # State management for current query index
48
  if 'current_query_index' not in st.session_state:
 
113
  mime="application/json"
114
  )
115
 
116
+ index = st.text_input(f"At index {st.session_state.current_query_index + 1}. Graded: {st.session_state.graded_queries}/{len(st.session_state.data)}", placeholder="Go to index:")
117
+ if index:
118
+ try:
119
+ index = int(index) - 1
120
+ if index < 0 or index >= len(data):
121
+ st.error("Invalid index.")
122
+ else:
123
+ st.session_state.current_query_index = index
124
+ st.rerun()
125
+ except ValueError:
126
+ st.error("Please enter a valid integer.")
127
+
128
  if st.session_state.graded_queries >= len(data):
129
  save_data(st.session_state.data)
130
  st.success(f"{len(data)} Queries graded and data saved!")
 
134
  st.header(f"Query: {current_query['query']}")
135
  status_color = 'green' if current_query.get('status', None) is not None else 'red'
136
  st.markdown(f"{current_query['grid_pos_str']} | Query Grade: <b style='color: {status_color};'>{'Graded' if status_color == 'green' else 'Ungraded'}</b>", unsafe_allow_html = True)
137
+ st.markdown(f"Model's Query Gen Reasoning Trace: {current_query['reasoning_trace'][0]}")
138
  st.subheader("Results:")
139
  for index, result in enumerate(current_query['results']):
140
  st.markdown(f"<div class='rounded-box'>", unsafe_allow_html=True)
141
  col1, col2 = st.columns([3, 2], gap="small")
142
+ with col1:
 
 
143
  st.markdown(f"<h5>{result['title']}</h5>", unsafe_allow_html=True)
144
  st.markdown(f"[<span style='font-size: 0.8em;'>{truncate_text(result['url'], length = 50)}</span>]({result['url']}) | {result['published_date']}", unsafe_allow_html=True)
145
  st.markdown(f"{truncate_text(result['text'], length = len(result['model_trace']))}")
146
  with col2:
147
  grade_color = 'green' if result['grade'].lower() == 'yes' else 'red'
148
+ st.markdown(f"Model Grade: <b style='color: {grade_color};'>{result['grade']}</b>", unsafe_allow_html=True)
149
  st.write(result['model_trace'])
150
 
151
+ if st.checkbox("Reject", value= not result.get('agree'), key=f'verify-{index}'):
152
+ result['agree'] = False
 
 
 
 
 
 
 
 
 
 
 
 
153
 
154
  st.markdown("</div>", unsafe_allow_html=True)
155
  st.markdown(f"<div class='rounded-box'>", unsafe_allow_html=True)
 
157
  # Show current query and its results
158
  current_query = st.session_state.data[st.session_state.current_query_index]
159
 
 
 
160
  display_query()
161
 
162
  col1, col2 = st.columns([5, 1], gap="small")
grid_eval_gpt4o.json ADDED
The diff for this file is too large to render. See raw diff