Spaces:

rowankwang
/

search-res-grader

Sleeping

App Files Files Community

rowankwang commited on Jun 26

Commit

3e8cd27

•

1 Parent(s): 1c82c33

final data

Browse files

Files changed (2) hide show

app.py +23 -28
grid_eval_gpt4o.json +0 -0

app.py CHANGED Viewed

@@ -18,15 +18,15 @@ st.set_page_config(layout="wide")
 #     config['preauthorized']
 # )
-file_path = 'synth_toy_eval.json'
 # Load your data
 @st.cache_data()
 def load_data():
     with open(file_path, 'r') as file:
         data = json.load(file)
-    random.shuffle(data)
-    data = data[:10]
     return data
 def save_data(data):
@@ -36,12 +36,13 @@ def save_data(data):
 def download_json(data):
     return json.dumps(data, indent=4)
 data = load_data()
 for query in data:
     for result in query['results']:
-        if 'verified' not in result:
-            result['verified'] = False
 # State management for current query index
 if 'current_query_index' not in st.session_state:
@@ -112,8 +113,18 @@ def display_query():
                 mime="application/json"
             )
-        st.markdown(f"<p>At index {st.session_state.current_query_index + 1}. Graded Queries: {st.session_state.graded_queries}/{len(st.session_state.data)}</p>", unsafe_allow_html=True)
     if st.session_state.graded_queries >= len(data):
         save_data(st.session_state.data)
         st.success(f"{len(data)} Queries graded and data saved!")
@@ -123,36 +134,22 @@ def display_query():
     st.header(f"Query: {current_query['query']}")
     status_color = 'green' if current_query.get('status', None) is not None else 'red'
     st.markdown(f"{current_query['grid_pos_str']} | Query Grade: <b style='color: {status_color};'>{'Graded' if status_color == 'green' else 'Ungraded'}</b>", unsafe_allow_html = True)
     st.subheader("Results:")
     for index, result in enumerate(current_query['results']):
         st.markdown(f"<div class='rounded-box'>", unsafe_allow_html=True)
         col1, col2 = st.columns([3, 2], gap="small")
-        with col1:
-            # title_style = f"color: {'green' if result.get('verified') is True else 'red' if result.get('verified') is False else 'white'};"
             st.markdown(f"<h5>{result['title']}</h5>", unsafe_allow_html=True)
             st.markdown(f"[<span style='font-size: 0.8em;'>{truncate_text(result['url'], length = 50)}</span>]({result['url']})  |  {result['published_date']}", unsafe_allow_html=True)
             st.markdown(f"{truncate_text(result['text'], length = len(result['model_trace']))}")
         with col2:
             grade_color = 'green' if result['grade'].lower() == 'yes' else 'red'
-            st.markdown(f"<b style='color: {grade_color};'>Model Grade: {result['grade']}</b>", unsafe_allow_html=True)
             st.write(result['model_trace'])
-            if st.checkbox("Accept", value=result.get('verified'), key=f'verify-{index}'):
-                result['verified'] = True
-            # btn_cols = st.columns([1, 1])
-            # with btn_cols[0]:
-            #     if st.button('Accept', key=f'accept-{index}'):
-            #         result['verified'] = True
-            #     if result.get('verified') is True:
-            #         st.write('Accepted')
-            # with btn_cols[1]:
-            #     if st.button('Reject', key=f'reject-{index}'):
-            #         result['verified'] = False
-            #     if result.get('verified') is False:
-            #         st.write('Rejected')
         st.markdown("</div>", unsafe_allow_html=True)
     st.markdown(f"<div class='rounded-box'>", unsafe_allow_html=True)
@@ -160,8 +157,6 @@ def display_query():
 # Show current query and its results
 current_query = st.session_state.data[st.session_state.current_query_index]
 display_query()
 col1, col2 = st.columns([5, 1], gap="small")

 #     config['preauthorized']
 # )
+file_path = 'grid_eval_gpt4o.json'
 # Load your data
 @st.cache_data()
 def load_data():
     with open(file_path, 'r') as file:
         data = json.load(file)
+    # random.shuffle(data)
+    # data = data[]
     return data
 def save_data(data):
 def download_json(data):
     return json.dumps(data, indent=4)
 data = load_data()
 for query in data:
     for result in query['results']:
+        if 'agree' not in result:
+            result['agree'] = True
 # State management for current query index
 if 'current_query_index' not in st.session_state:
                 mime="application/json"
             )
+        index = st.text_input(f"At index {st.session_state.current_query_index + 1}. Graded: {st.session_state.graded_queries}/{len(st.session_state.data)}", placeholder="Go to index:")
+        if index:
+            try:
+                index = int(index) - 1
+                if index < 0 or index >= len(data):
+                    st.error("Invalid index.")
+                else:
+                    st.session_state.current_query_index = index
+                    st.rerun()
+            except ValueError:
+                st.error("Please enter a valid integer.")
     if st.session_state.graded_queries >= len(data):
         save_data(st.session_state.data)
         st.success(f"{len(data)} Queries graded and data saved!")
     st.header(f"Query: {current_query['query']}")
     status_color = 'green' if current_query.get('status', None) is not None else 'red'
     st.markdown(f"{current_query['grid_pos_str']} | Query Grade: <b style='color: {status_color};'>{'Graded' if status_color == 'green' else 'Ungraded'}</b>", unsafe_allow_html = True)
+    st.markdown(f"Model's Query Gen Reasoning Trace: {current_query['reasoning_trace'][0]}")
     st.subheader("Results:")
     for index, result in enumerate(current_query['results']):
         st.markdown(f"<div class='rounded-box'>", unsafe_allow_html=True)
         col1, col2 = st.columns([3, 2], gap="small")
+        with col1:
             st.markdown(f"<h5>{result['title']}</h5>", unsafe_allow_html=True)
             st.markdown(f"[<span style='font-size: 0.8em;'>{truncate_text(result['url'], length = 50)}</span>]({result['url']})  |  {result['published_date']}", unsafe_allow_html=True)
             st.markdown(f"{truncate_text(result['text'], length = len(result['model_trace']))}")
         with col2:
             grade_color = 'green' if result['grade'].lower() == 'yes' else 'red'
+            st.markdown(f"Model Grade: <b style='color: {grade_color};'>{result['grade']}</b>", unsafe_allow_html=True)
             st.write(result['model_trace'])
+            if st.checkbox("Reject", value= not result.get('agree'), key=f'verify-{index}'):
+                result['agree'] = False
         st.markdown("</div>", unsafe_allow_html=True)
     st.markdown(f"<div class='rounded-box'>", unsafe_allow_html=True)
 # Show current query and its results
 current_query = st.session_state.data[st.session_state.current_query_index]
 display_query()
 col1, col2 = st.columns([5, 1], gap="small")

grid_eval_gpt4o.json ADDED Viewed

The diff for this file is too large to render. See raw diff