Spaces:
Runtime error
Runtime error
Blair Yang
commited on
Commit
•
91143ec
1
Parent(s):
de1d92a
nwo able to record responses
Browse files- Sample.py +4 -2
- __pycache__/Sample.cpython-311.pyc +0 -0
- app.py +36 -2
- responses/.DS_Store +0 -0
- responses/mmlu/.DS_Store +0 -0
- responses/mmlu/high_school_physics/response.csv +1 -0
Sample.py
CHANGED
@@ -52,9 +52,10 @@ def sample_random_entry(dataset='', topic='', model='', n=1):
|
|
52 |
|
53 |
# print(f"Sampling {n} random entries from {dataset} - {topic} - {model}")
|
54 |
card_lst = sample_card(dataset, topic, model)
|
55 |
-
qa = sample_QA_entry(dataset, topic, model)
|
56 |
|
57 |
display_dict, info_dict = process_for_display(card_lst, qa)
|
|
|
58 |
|
59 |
return display_dict, info_dict
|
60 |
|
@@ -108,8 +109,9 @@ def sample_QA_entry(dataset='', topic='', model='', n=1):
|
|
108 |
df = df[df['model'] == model]
|
109 |
sample = df.sample(1)
|
110 |
# Convert to dictionary
|
|
|
111 |
sample = sample.to_dict(orient='records')[0]
|
112 |
-
return
|
113 |
|
114 |
if __name__ == '__main__':
|
115 |
sample_random_entry(n=5)
|
|
|
52 |
|
53 |
# print(f"Sampling {n} random entries from {dataset} - {topic} - {model}")
|
54 |
card_lst = sample_card(dataset, topic, model)
|
55 |
+
qa, index = sample_QA_entry(dataset, topic, model)
|
56 |
|
57 |
display_dict, info_dict = process_for_display(card_lst, qa)
|
58 |
+
info_dict['index'] = index
|
59 |
|
60 |
return display_dict, info_dict
|
61 |
|
|
|
109 |
df = df[df['model'] == model]
|
110 |
sample = df.sample(1)
|
111 |
# Convert to dictionary
|
112 |
+
sample_idx = sample.index[0]
|
113 |
sample = sample.to_dict(orient='records')[0]
|
114 |
+
return sample, sample_idx
|
115 |
|
116 |
if __name__ == '__main__':
|
117 |
sample_random_entry(n=5)
|
__pycache__/Sample.cpython-311.pyc
CHANGED
Binary files a/__pycache__/Sample.cpython-311.pyc and b/__pycache__/Sample.cpython-311.pyc differ
|
|
app.py
CHANGED
@@ -1,9 +1,26 @@
|
|
1 |
import gradio as gr
|
2 |
from Sample import sample_random_entry
|
3 |
from Config import TOPICS
|
|
|
|
|
|
|
4 |
|
|
|
5 |
info_dict = {}
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
def sample_and_display(topic):
|
8 |
# If a topic is selected, use it to sample a new entry
|
9 |
global info_dict
|
@@ -18,7 +35,7 @@ def evaluate_guess(reasoning, correctness, confidence, topic):
|
|
18 |
global info_dict
|
19 |
# Here your logic will go to evaluate the guess
|
20 |
# Placeholder for the correct logic to determine the correct answer
|
21 |
-
correct_answer =
|
22 |
evaluation_response = "Correct" if correctness == correct_answer else "Incorrect"
|
23 |
|
24 |
# Assuming info_dict is updated by sample_and_display function
|
@@ -27,6 +44,23 @@ def evaluate_guess(reasoning, correctness, confidence, topic):
|
|
27 |
|
28 |
# Update the completion text
|
29 |
completion_text = f"Completion: {actual_completion}\n\nChoice: {chr(info_dict.get('verdict', 0) + 65)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
return evaluation_response, actual_model, completion_text
|
31 |
|
32 |
# Initial sampling
|
@@ -43,7 +77,7 @@ with gr.Blocks() as app:
|
|
43 |
with gr.Column(scale=1):
|
44 |
question = gr.Textbox(value=question_text, label="Question", interactive=False)
|
45 |
reasoning = gr.Textbox(lines=5, placeholder="Your reasoning (optional)")
|
46 |
-
correctness = gr.Radio(choices=["Correct", "Incorrect"], label="I
|
47 |
confidence = gr.Slider(minimum=0, maximum=10, step=1, label="Confidence")
|
48 |
output_text = gr.Text(label="Evaluation Output")
|
49 |
submit_button = gr.Button("Submit")
|
|
|
1 |
import gradio as gr
|
2 |
from Sample import sample_random_entry
|
3 |
from Config import TOPICS
|
4 |
+
import pandas as pd
|
5 |
+
import os
|
6 |
+
from threading import Lock
|
7 |
|
8 |
+
lock = Lock()
|
9 |
info_dict = {}
|
10 |
|
11 |
+
def append_to_csv(output_path, row_data, header_names):
|
12 |
+
# Acquire the lock before accessing the file
|
13 |
+
with lock:
|
14 |
+
# Check if file exists and is not empty
|
15 |
+
if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
|
16 |
+
# File exists and is not empty, append without headers
|
17 |
+
df = pd.DataFrame([row_data])
|
18 |
+
df.to_csv(output_path, mode='a', header=False, index=False)
|
19 |
+
else:
|
20 |
+
# File does not exist or is empty, write with headers
|
21 |
+
df = pd.DataFrame([row_data], columns=header_names)
|
22 |
+
df.to_csv(output_path, mode='w', header=True, index=False)
|
23 |
+
|
24 |
def sample_and_display(topic):
|
25 |
# If a topic is selected, use it to sample a new entry
|
26 |
global info_dict
|
|
|
35 |
global info_dict
|
36 |
# Here your logic will go to evaluate the guess
|
37 |
# Placeholder for the correct logic to determine the correct answer
|
38 |
+
correct_answer = 'Correctly' if info_dict['correctness'] else 'Incorrectly'
|
39 |
evaluation_response = "Correct" if correctness == correct_answer else "Incorrect"
|
40 |
|
41 |
# Assuming info_dict is updated by sample_and_display function
|
|
|
44 |
|
45 |
# Update the completion text
|
46 |
completion_text = f"Completion: {actual_completion}\n\nChoice: {chr(info_dict.get('verdict', 0) + 65)}"
|
47 |
+
|
48 |
+
question_index = info_dict.get('index', -1)
|
49 |
+
question_topic = topic
|
50 |
+
output_path = f'responses/mmlu/{question_topic}/response.csv'
|
51 |
+
entry = dict()
|
52 |
+
|
53 |
+
entry['index'] = question_index
|
54 |
+
entry['model'] = actual_model
|
55 |
+
entry['reasoning'] = reasoning
|
56 |
+
entry['correctness'] = correctness == correct_answer
|
57 |
+
entry['confidence'] = confidence
|
58 |
+
|
59 |
+
header_names = ['index', 'model', 'reasoning', 'correctness', 'confidence'] # Add other headers as necessary
|
60 |
+
|
61 |
+
append_to_csv(output_path, entry, header_names)
|
62 |
+
|
63 |
+
|
64 |
return evaluation_response, actual_model, completion_text
|
65 |
|
66 |
# Initial sampling
|
|
|
77 |
with gr.Column(scale=1):
|
78 |
question = gr.Textbox(value=question_text, label="Question", interactive=False)
|
79 |
reasoning = gr.Textbox(lines=5, placeholder="Your reasoning (optional)")
|
80 |
+
correctness = gr.Radio(choices=["Correct", "Incorrect"], label="I beplaceholderlieve the model will answer this question")
|
81 |
confidence = gr.Slider(minimum=0, maximum=10, step=1, label="Confidence")
|
82 |
output_text = gr.Text(label="Evaluation Output")
|
83 |
submit_button = gr.Button("Submit")
|
responses/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
responses/mmlu/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
responses/mmlu/high_school_physics/response.csv
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
index,model,reasoning,correctness,confidence
|