Spaces:

allenai
/

ZebraLogic

Running

yuchenlin commited on Jul 19

Commit

c1a5b93

•

1 Parent(s): 3e5d61f

add truth data viewer

Files changed (3) hide show

app.py CHANGED Viewed

@@ -104,7 +104,8 @@ def sample_explore_item(model_name, size_H, size_W):
     puzzle_solved = explore_item['correct_cells'] == explore_item['total_cells']
     cell_acc = explore_item["correct_cells"] / explore_item["total_cells"] * 100
     model_eval_md = f"### 🆚 Evaluation:\n\n  **Total Cells**: {explore_item['total_cells']} | **Correct Cells**: {explore_item['correct_cells']} | **Puzzle solved**: {puzzle_solved} | **Cell Acc**: {cell_acc:.2f}%"
-    return puzzle_md, model_reasoning_md, model_prediction_md, model_eval_md
 def _tab_explore():
@@ -124,11 +125,11 @@ def _tab_explore():
     puzzle_md = gr.Markdown("### 🦓 Puzzle: \n\nTo be loaded", elem_id="puzzle-md", elem_classes="box_md")
     model_reasoning_md = gr.Markdown("### 🤖 Reasoning: \n\nTo be loaded", elem_id="model-reasoning-md", elem_classes="box_md")
     model_prediction_md = gr.Markdown("### 💬 Answer: \n\nTo be loaded", elem_id="model-prediction-md", elem_classes="box_md")
     model_eval_md = gr.Markdown("### 🆚 Evaluation: \n\nTo be loaded", elem_id="model-eval-md", elem_classes="box_md")
     explore_button.click(fn=sample_explore_item,
                          inputs=[model_selection, size_H_selection, size_W_selection],
-                         outputs=[puzzle_md, model_reasoning_md, model_prediction_md, model_eval_md])

     puzzle_solved = explore_item['correct_cells'] == explore_item['total_cells']
     cell_acc = explore_item["correct_cells"] / explore_item["total_cells"] * 100
     model_eval_md = f"### 🆚 Evaluation:\n\n  **Total Cells**: {explore_item['total_cells']} | **Correct Cells**: {explore_item['correct_cells']} | **Puzzle solved**: {puzzle_solved} | **Cell Acc**: {cell_acc:.2f}%"
+    turht_solution_md = f"### ✅ Truth Solution:\n\n{explore_item['truth_solution_table']}"
+    return puzzle_md, model_reasoning_md, model_prediction_md, model_eval_md, turht_solution_md
 def _tab_explore():
     puzzle_md = gr.Markdown("### 🦓 Puzzle: \n\nTo be loaded", elem_id="puzzle-md", elem_classes="box_md")
     model_reasoning_md = gr.Markdown("### 🤖 Reasoning: \n\nTo be loaded", elem_id="model-reasoning-md", elem_classes="box_md")
     model_prediction_md = gr.Markdown("### 💬 Answer: \n\nTo be loaded", elem_id="model-prediction-md", elem_classes="box_md")
+    turht_solution_md = gr.Markdown("### ✅ Truth Solution: \n\nTo be loaded", elem_id="truth-solution-md", elem_classes="box_md")
     model_eval_md = gr.Markdown("### 🆚 Evaluation: \n\nTo be loaded", elem_id="model-eval-md", elem_classes="box_md")
     explore_button.click(fn=sample_explore_item,
                          inputs=[model_selection, size_H_selection, size_W_selection],
+                         outputs=[puzzle_md, model_reasoning_md, model_prediction_md, model_eval_md, turht_solution_md])

data_utils.py CHANGED Viewed

@@ -92,6 +92,8 @@ def get_random_item(model_name="random", size_H="random", size_W="random"):
             continue
         if "child" in item["puzzle"].lower() or "mother" in item["puzzle"].lower():
             continue
         prediction_reasoning = prediction_json.get("reasoning", "")
         prediction_table = prediction_json["solution"]
         if prediction_table is not None:
@@ -120,10 +122,11 @@ def get_random_item(model_name="random", size_H="random", size_W="random"):
     table_md = tabulate(rows, headers=headers, tablefmt="github")
     explore_item["solution_table_md"] = table_md
-    this_total_cells, this_correct_cells = eval_each_puzzle(explore_item["id"], prediction_table)
     # print(table_md)
     explore_item["correct_cells"] = this_correct_cells
     explore_item["total_cells"] = this_total_cells
     return explore_item

             continue
         if "child" in item["puzzle"].lower() or "mother" in item["puzzle"].lower():
             continue
+        if "loves the spaghetti eater" in item["puzzle"].lower():
+            continue
         prediction_reasoning = prediction_json.get("reasoning", "")
         prediction_table = prediction_json["solution"]
         if prediction_table is not None:
     table_md = tabulate(rows, headers=headers, tablefmt="github")
     explore_item["solution_table_md"] = table_md
+    this_total_cells, this_correct_cells, truth_solution_table = eval_each_puzzle(explore_item["id"], prediction_table)
     # print(table_md)
     explore_item["correct_cells"] = this_correct_cells
     explore_item["total_cells"] = this_total_cells
+    explore_item["truth_solution_table"]  = tabulate(truth_solution_table["rows"], headers=truth_solution_table["header"], tablefmt="github")
     return explore_item

eval_utils.py CHANGED Viewed

@@ -83,7 +83,7 @@ def eval_each_puzzle(id, prediction_table):
                     predicted_cell = prediction_table[house][column].lower().strip()
                 if truth_cell == predicted_cell:
                     this_correct_cells += 1
-    return this_total_cells, this_correct_cells
 def eval_model(model, filepath):
     global private_solutions

                     predicted_cell = prediction_table[house][column].lower().strip()
                 if truth_cell == predicted_cell:
                     this_correct_cells += 1
+    return this_total_cells, this_correct_cells, private_solutions[id]
 def eval_model(model, filepath):
     global private_solutions