Spaces:
Sleeping
Sleeping
rowankwang
commited on
Commit
•
3e8cd27
1
Parent(s):
1c82c33
final data
Browse files- app.py +23 -28
- grid_eval_gpt4o.json +0 -0
app.py
CHANGED
@@ -18,15 +18,15 @@ st.set_page_config(layout="wide")
|
|
18 |
# config['preauthorized']
|
19 |
# )
|
20 |
|
21 |
-
file_path = '
|
22 |
|
23 |
# Load your data
|
24 |
@st.cache_data()
|
25 |
def load_data():
|
26 |
with open(file_path, 'r') as file:
|
27 |
data = json.load(file)
|
28 |
-
random.shuffle(data)
|
29 |
-
data = data[
|
30 |
return data
|
31 |
|
32 |
def save_data(data):
|
@@ -36,12 +36,13 @@ def save_data(data):
|
|
36 |
|
37 |
def download_json(data):
|
38 |
return json.dumps(data, indent=4)
|
|
|
39 |
data = load_data()
|
40 |
|
41 |
for query in data:
|
42 |
for result in query['results']:
|
43 |
-
if '
|
44 |
-
result['
|
45 |
|
46 |
# State management for current query index
|
47 |
if 'current_query_index' not in st.session_state:
|
@@ -112,8 +113,18 @@ def display_query():
|
|
112 |
mime="application/json"
|
113 |
)
|
114 |
|
115 |
-
st.
|
116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
if st.session_state.graded_queries >= len(data):
|
118 |
save_data(st.session_state.data)
|
119 |
st.success(f"{len(data)} Queries graded and data saved!")
|
@@ -123,36 +134,22 @@ def display_query():
|
|
123 |
st.header(f"Query: {current_query['query']}")
|
124 |
status_color = 'green' if current_query.get('status', None) is not None else 'red'
|
125 |
st.markdown(f"{current_query['grid_pos_str']} | Query Grade: <b style='color: {status_color};'>{'Graded' if status_color == 'green' else 'Ungraded'}</b>", unsafe_allow_html = True)
|
126 |
-
|
127 |
st.subheader("Results:")
|
128 |
for index, result in enumerate(current_query['results']):
|
129 |
st.markdown(f"<div class='rounded-box'>", unsafe_allow_html=True)
|
130 |
col1, col2 = st.columns([3, 2], gap="small")
|
131 |
-
with col1:
|
132 |
-
# title_style = f"color: {'green' if result.get('verified') is True else 'red' if result.get('verified') is False else 'white'};"
|
133 |
-
|
134 |
st.markdown(f"<h5>{result['title']}</h5>", unsafe_allow_html=True)
|
135 |
st.markdown(f"[<span style='font-size: 0.8em;'>{truncate_text(result['url'], length = 50)}</span>]({result['url']}) | {result['published_date']}", unsafe_allow_html=True)
|
136 |
st.markdown(f"{truncate_text(result['text'], length = len(result['model_trace']))}")
|
137 |
with col2:
|
138 |
grade_color = 'green' if result['grade'].lower() == 'yes' else 'red'
|
139 |
-
st.markdown(f"<b style='color: {grade_color};'>
|
140 |
st.write(result['model_trace'])
|
141 |
|
142 |
-
if st.checkbox("
|
143 |
-
result['
|
144 |
-
|
145 |
-
# btn_cols = st.columns([1, 1])
|
146 |
-
# with btn_cols[0]:
|
147 |
-
# if st.button('Accept', key=f'accept-{index}'):
|
148 |
-
# result['verified'] = True
|
149 |
-
# if result.get('verified') is True:
|
150 |
-
# st.write('Accepted')
|
151 |
-
# with btn_cols[1]:
|
152 |
-
# if st.button('Reject', key=f'reject-{index}'):
|
153 |
-
# result['verified'] = False
|
154 |
-
# if result.get('verified') is False:
|
155 |
-
# st.write('Rejected')
|
156 |
|
157 |
st.markdown("</div>", unsafe_allow_html=True)
|
158 |
st.markdown(f"<div class='rounded-box'>", unsafe_allow_html=True)
|
@@ -160,8 +157,6 @@ def display_query():
|
|
160 |
# Show current query and its results
|
161 |
current_query = st.session_state.data[st.session_state.current_query_index]
|
162 |
|
163 |
-
|
164 |
-
|
165 |
display_query()
|
166 |
|
167 |
col1, col2 = st.columns([5, 1], gap="small")
|
|
|
18 |
# config['preauthorized']
|
19 |
# )
|
20 |
|
21 |
+
file_path = 'grid_eval_gpt4o.json'
|
22 |
|
23 |
# Load your data
|
24 |
@st.cache_data()
|
25 |
def load_data():
|
26 |
with open(file_path, 'r') as file:
|
27 |
data = json.load(file)
|
28 |
+
# random.shuffle(data)
|
29 |
+
# data = data[]
|
30 |
return data
|
31 |
|
32 |
def save_data(data):
|
|
|
36 |
|
37 |
def download_json(data):
|
38 |
return json.dumps(data, indent=4)
|
39 |
+
|
40 |
data = load_data()
|
41 |
|
42 |
for query in data:
|
43 |
for result in query['results']:
|
44 |
+
if 'agree' not in result:
|
45 |
+
result['agree'] = True
|
46 |
|
47 |
# State management for current query index
|
48 |
if 'current_query_index' not in st.session_state:
|
|
|
113 |
mime="application/json"
|
114 |
)
|
115 |
|
116 |
+
index = st.text_input(f"At index {st.session_state.current_query_index + 1}. Graded: {st.session_state.graded_queries}/{len(st.session_state.data)}", placeholder="Go to index:")
|
117 |
+
if index:
|
118 |
+
try:
|
119 |
+
index = int(index) - 1
|
120 |
+
if index < 0 or index >= len(data):
|
121 |
+
st.error("Invalid index.")
|
122 |
+
else:
|
123 |
+
st.session_state.current_query_index = index
|
124 |
+
st.rerun()
|
125 |
+
except ValueError:
|
126 |
+
st.error("Please enter a valid integer.")
|
127 |
+
|
128 |
if st.session_state.graded_queries >= len(data):
|
129 |
save_data(st.session_state.data)
|
130 |
st.success(f"{len(data)} Queries graded and data saved!")
|
|
|
134 |
st.header(f"Query: {current_query['query']}")
|
135 |
status_color = 'green' if current_query.get('status', None) is not None else 'red'
|
136 |
st.markdown(f"{current_query['grid_pos_str']} | Query Grade: <b style='color: {status_color};'>{'Graded' if status_color == 'green' else 'Ungraded'}</b>", unsafe_allow_html = True)
|
137 |
+
st.markdown(f"Model's Query Gen Reasoning Trace: {current_query['reasoning_trace'][0]}")
|
138 |
st.subheader("Results:")
|
139 |
for index, result in enumerate(current_query['results']):
|
140 |
st.markdown(f"<div class='rounded-box'>", unsafe_allow_html=True)
|
141 |
col1, col2 = st.columns([3, 2], gap="small")
|
142 |
+
with col1:
|
|
|
|
|
143 |
st.markdown(f"<h5>{result['title']}</h5>", unsafe_allow_html=True)
|
144 |
st.markdown(f"[<span style='font-size: 0.8em;'>{truncate_text(result['url'], length = 50)}</span>]({result['url']}) | {result['published_date']}", unsafe_allow_html=True)
|
145 |
st.markdown(f"{truncate_text(result['text'], length = len(result['model_trace']))}")
|
146 |
with col2:
|
147 |
grade_color = 'green' if result['grade'].lower() == 'yes' else 'red'
|
148 |
+
st.markdown(f"Model Grade: <b style='color: {grade_color};'>{result['grade']}</b>", unsafe_allow_html=True)
|
149 |
st.write(result['model_trace'])
|
150 |
|
151 |
+
if st.checkbox("Reject", value= not result.get('agree'), key=f'verify-{index}'):
|
152 |
+
result['agree'] = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
|
154 |
st.markdown("</div>", unsafe_allow_html=True)
|
155 |
st.markdown(f"<div class='rounded-box'>", unsafe_allow_html=True)
|
|
|
157 |
# Show current query and its results
|
158 |
current_query = st.session_state.data[st.session_state.current_query_index]
|
159 |
|
|
|
|
|
160 |
display_query()
|
161 |
|
162 |
col1, col2 = st.columns([5, 1], gap="small")
|
grid_eval_gpt4o.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|