Spaces:

loveblairsky
/

LLM-model-cards

Runtime error

App Files Files Community

Blair Yang commited on Feb 19

Commit

91143ec

•

1 Parent(s): de1d92a

nwo able to record responses

Browse files

Files changed (6) hide show

Sample.py +4 -2
__pycache__/Sample.cpython-311.pyc +0 -0
app.py +36 -2
responses/.DS_Store +0 -0
responses/mmlu/.DS_Store +0 -0
responses/mmlu/high_school_physics/response.csv +1 -0

Sample.py CHANGED Viewed

@@ -52,9 +52,10 @@ def sample_random_entry(dataset='', topic='', model='', n=1):
     # print(f"Sampling {n} random entries from {dataset} - {topic} - {model}")
     card_lst = sample_card(dataset, topic, model)
-    qa = sample_QA_entry(dataset, topic, model)
     display_dict, info_dict = process_for_display(card_lst, qa)
     return display_dict, info_dict
@@ -108,8 +109,9 @@ def sample_QA_entry(dataset='', topic='', model='', n=1):
     df = df[df['model'] == model]
     sample = df.sample(1)
     # Convert to dictionary
     sample = sample.to_dict(orient='records')[0]
-    return (sample)
 if __name__ == '__main__':
     sample_random_entry(n=5)

     # print(f"Sampling {n} random entries from {dataset} - {topic} - {model}")
     card_lst = sample_card(dataset, topic, model)
+    qa, index = sample_QA_entry(dataset, topic, model)
     display_dict, info_dict = process_for_display(card_lst, qa)
+    info_dict['index'] = index
     return display_dict, info_dict
     df = df[df['model'] == model]
     sample = df.sample(1)
     # Convert to dictionary
+    sample_idx = sample.index[0]
     sample = sample.to_dict(orient='records')[0]
+    return sample, sample_idx
 if __name__ == '__main__':
     sample_random_entry(n=5)

__pycache__/Sample.cpython-311.pyc CHANGED Viewed

Binary files a/__pycache__/Sample.cpython-311.pyc and b/__pycache__/Sample.cpython-311.pyc differ

app.py CHANGED Viewed

@@ -1,9 +1,26 @@
 import gradio as gr
 from Sample import sample_random_entry
 from Config import TOPICS
 info_dict = {}
 def sample_and_display(topic):
     # If a topic is selected, use it to sample a new entry
     global info_dict
@@ -18,7 +35,7 @@ def evaluate_guess(reasoning, correctness, confidence, topic):
     global info_dict
     # Here your logic will go to evaluate the guess
     # Placeholder for the correct logic to determine the correct answer
-    correct_answer = "Correctly"
     evaluation_response = "Correct" if correctness == correct_answer else "Incorrect"
     # Assuming info_dict is updated by sample_and_display function
@@ -27,6 +44,23 @@ def evaluate_guess(reasoning, correctness, confidence, topic):
     # Update the completion text
     completion_text = f"Completion: {actual_completion}\n\nChoice: {chr(info_dict.get('verdict', 0) + 65)}"
     return evaluation_response, actual_model, completion_text
 # Initial sampling
@@ -43,7 +77,7 @@ with gr.Blocks() as app:
         with gr.Column(scale=1):
             question = gr.Textbox(value=question_text, label="Question", interactive=False)
             reasoning = gr.Textbox(lines=5, placeholder="Your reasoning (optional)")
-            correctness = gr.Radio(choices=["Correct", "Incorrect"], label="I believe the model will answer this question")
             confidence = gr.Slider(minimum=0, maximum=10, step=1, label="Confidence")
             output_text = gr.Text(label="Evaluation Output")
             submit_button = gr.Button("Submit")

 import gradio as gr
 from Sample import sample_random_entry
 from Config import TOPICS
+import pandas as pd
+import os
+from threading import Lock
+lock = Lock()
 info_dict = {}
+def append_to_csv(output_path, row_data, header_names):
+    # Acquire the lock before accessing the file
+    with lock:
+        # Check if file exists and is not empty
+        if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
+            # File exists and is not empty, append without headers
+            df = pd.DataFrame([row_data])
+            df.to_csv(output_path, mode='a', header=False, index=False)
+        else:
+            # File does not exist or is empty, write with headers
+            df = pd.DataFrame([row_data], columns=header_names)
+            df.to_csv(output_path, mode='w', header=True, index=False)
 def sample_and_display(topic):
     # If a topic is selected, use it to sample a new entry
     global info_dict
     global info_dict
     # Here your logic will go to evaluate the guess
     # Placeholder for the correct logic to determine the correct answer
+    correct_answer = 'Correctly' if info_dict['correctness'] else 'Incorrectly'
     evaluation_response = "Correct" if correctness == correct_answer else "Incorrect"
     # Assuming info_dict is updated by sample_and_display function
     # Update the completion text
     completion_text = f"Completion: {actual_completion}\n\nChoice: {chr(info_dict.get('verdict', 0) + 65)}"
+    question_index = info_dict.get('index', -1)
+    question_topic = topic
+    output_path = f'responses/mmlu/{question_topic}/response.csv'
+    entry = dict()
+    entry['index'] = question_index
+    entry['model'] = actual_model
+    entry['reasoning'] = reasoning
+    entry['correctness'] = correctness == correct_answer
+    entry['confidence'] = confidence
+    header_names = ['index', 'model', 'reasoning', 'correctness', 'confidence']  # Add other headers as necessary
+    append_to_csv(output_path, entry, header_names)
     return evaluation_response, actual_model, completion_text
 # Initial sampling
         with gr.Column(scale=1):
             question = gr.Textbox(value=question_text, label="Question", interactive=False)
             reasoning = gr.Textbox(lines=5, placeholder="Your reasoning (optional)")
+            correctness = gr.Radio(choices=["Correct", "Incorrect"], label="I beplaceholderlieve the model will answer this question")
             confidence = gr.Slider(minimum=0, maximum=10, step=1, label="Confidence")
             output_text = gr.Text(label="Evaluation Output")
             submit_button = gr.Button("Submit")

responses/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

responses/mmlu/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

responses/mmlu/high_school_physics/response.csv ADDED Viewed

	@@ -0,0 +1 @@


1	+ index,model,reasoning,correctness,confidence