Spaces:
Running
Running
add truth data viewer
Browse files- app.py +4 -3
- data_utils.py +4 -1
- eval_utils.py +1 -1
app.py
CHANGED
@@ -104,7 +104,8 @@ def sample_explore_item(model_name, size_H, size_W):
|
|
104 |
puzzle_solved = explore_item['correct_cells'] == explore_item['total_cells']
|
105 |
cell_acc = explore_item["correct_cells"] / explore_item["total_cells"] * 100
|
106 |
model_eval_md = f"### π Evaluation:\n\n **Total Cells**: {explore_item['total_cells']} | **Correct Cells**: {explore_item['correct_cells']} | **Puzzle solved**: {puzzle_solved} | **Cell Acc**: {cell_acc:.2f}%"
|
107 |
-
|
|
|
108 |
|
109 |
|
110 |
def _tab_explore():
|
@@ -124,11 +125,11 @@ def _tab_explore():
|
|
124 |
puzzle_md = gr.Markdown("### π¦ Puzzle: \n\nTo be loaded", elem_id="puzzle-md", elem_classes="box_md")
|
125 |
model_reasoning_md = gr.Markdown("### π€ Reasoning: \n\nTo be loaded", elem_id="model-reasoning-md", elem_classes="box_md")
|
126 |
model_prediction_md = gr.Markdown("### π¬ Answer: \n\nTo be loaded", elem_id="model-prediction-md", elem_classes="box_md")
|
|
|
127 |
model_eval_md = gr.Markdown("### π Evaluation: \n\nTo be loaded", elem_id="model-eval-md", elem_classes="box_md")
|
128 |
-
|
129 |
explore_button.click(fn=sample_explore_item,
|
130 |
inputs=[model_selection, size_H_selection, size_W_selection],
|
131 |
-
outputs=[puzzle_md, model_reasoning_md, model_prediction_md, model_eval_md])
|
132 |
|
133 |
|
134 |
|
|
|
104 |
puzzle_solved = explore_item['correct_cells'] == explore_item['total_cells']
|
105 |
cell_acc = explore_item["correct_cells"] / explore_item["total_cells"] * 100
|
106 |
model_eval_md = f"### π Evaluation:\n\n **Total Cells**: {explore_item['total_cells']} | **Correct Cells**: {explore_item['correct_cells']} | **Puzzle solved**: {puzzle_solved} | **Cell Acc**: {cell_acc:.2f}%"
|
107 |
+
turht_solution_md = f"### β
Truth Solution:\n\n{explore_item['truth_solution_table']}"
|
108 |
+
return puzzle_md, model_reasoning_md, model_prediction_md, model_eval_md, turht_solution_md
|
109 |
|
110 |
|
111 |
def _tab_explore():
|
|
|
125 |
puzzle_md = gr.Markdown("### π¦ Puzzle: \n\nTo be loaded", elem_id="puzzle-md", elem_classes="box_md")
|
126 |
model_reasoning_md = gr.Markdown("### π€ Reasoning: \n\nTo be loaded", elem_id="model-reasoning-md", elem_classes="box_md")
|
127 |
model_prediction_md = gr.Markdown("### π¬ Answer: \n\nTo be loaded", elem_id="model-prediction-md", elem_classes="box_md")
|
128 |
+
turht_solution_md = gr.Markdown("### β
Truth Solution: \n\nTo be loaded", elem_id="truth-solution-md", elem_classes="box_md")
|
129 |
model_eval_md = gr.Markdown("### π Evaluation: \n\nTo be loaded", elem_id="model-eval-md", elem_classes="box_md")
|
|
|
130 |
explore_button.click(fn=sample_explore_item,
|
131 |
inputs=[model_selection, size_H_selection, size_W_selection],
|
132 |
+
outputs=[puzzle_md, model_reasoning_md, model_prediction_md, model_eval_md, turht_solution_md])
|
133 |
|
134 |
|
135 |
|
data_utils.py
CHANGED
@@ -92,6 +92,8 @@ def get_random_item(model_name="random", size_H="random", size_W="random"):
|
|
92 |
continue
|
93 |
if "child" in item["puzzle"].lower() or "mother" in item["puzzle"].lower():
|
94 |
continue
|
|
|
|
|
95 |
prediction_reasoning = prediction_json.get("reasoning", "")
|
96 |
prediction_table = prediction_json["solution"]
|
97 |
if prediction_table is not None:
|
@@ -120,10 +122,11 @@ def get_random_item(model_name="random", size_H="random", size_W="random"):
|
|
120 |
table_md = tabulate(rows, headers=headers, tablefmt="github")
|
121 |
explore_item["solution_table_md"] = table_md
|
122 |
|
123 |
-
this_total_cells, this_correct_cells = eval_each_puzzle(explore_item["id"], prediction_table)
|
124 |
# print(table_md)
|
125 |
explore_item["correct_cells"] = this_correct_cells
|
126 |
explore_item["total_cells"] = this_total_cells
|
|
|
127 |
return explore_item
|
128 |
|
129 |
|
|
|
92 |
continue
|
93 |
if "child" in item["puzzle"].lower() or "mother" in item["puzzle"].lower():
|
94 |
continue
|
95 |
+
if "loves the spaghetti eater" in item["puzzle"].lower():
|
96 |
+
continue
|
97 |
prediction_reasoning = prediction_json.get("reasoning", "")
|
98 |
prediction_table = prediction_json["solution"]
|
99 |
if prediction_table is not None:
|
|
|
122 |
table_md = tabulate(rows, headers=headers, tablefmt="github")
|
123 |
explore_item["solution_table_md"] = table_md
|
124 |
|
125 |
+
this_total_cells, this_correct_cells, truth_solution_table = eval_each_puzzle(explore_item["id"], prediction_table)
|
126 |
# print(table_md)
|
127 |
explore_item["correct_cells"] = this_correct_cells
|
128 |
explore_item["total_cells"] = this_total_cells
|
129 |
+
explore_item["truth_solution_table"] = tabulate(truth_solution_table["rows"], headers=truth_solution_table["header"], tablefmt="github")
|
130 |
return explore_item
|
131 |
|
132 |
|
eval_utils.py
CHANGED
@@ -83,7 +83,7 @@ def eval_each_puzzle(id, prediction_table):
|
|
83 |
predicted_cell = prediction_table[house][column].lower().strip()
|
84 |
if truth_cell == predicted_cell:
|
85 |
this_correct_cells += 1
|
86 |
-
return this_total_cells, this_correct_cells
|
87 |
|
88 |
def eval_model(model, filepath):
|
89 |
global private_solutions
|
|
|
83 |
predicted_cell = prediction_table[house][column].lower().strip()
|
84 |
if truth_cell == predicted_cell:
|
85 |
this_correct_cells += 1
|
86 |
+
return this_total_cells, this_correct_cells, private_solutions[id]
|
87 |
|
88 |
def eval_model(model, filepath):
|
89 |
global private_solutions
|