Spaces:
Sleeping
Sleeping
elibrowne
commited on
Commit
•
a9fe74f
1
Parent(s):
f2c0ef6
Login feature, token estimates
Browse files- app.py +49 -23
- sample.csv +0 -0
- token_estimates.py +37 -0
app.py
CHANGED
@@ -28,7 +28,7 @@ def save_json(score1, score2):
|
|
28 |
json.dump({"relevance": score1, "novelty": score2, "datetime": datetime.now().isoformat()}, f)
|
29 |
f.write("\n")
|
30 |
|
31 |
-
# READING EXISTING DATA: this is used to read
|
32 |
|
33 |
"""
|
34 |
from datasets import load_dataset
|
@@ -52,12 +52,19 @@ passage_texts = ["The IDEA also requires states receiving federal funds to educa
|
|
52 |
"To assure that children attending public schools obtain a high quality education, the legislature shall make adequate provision to ensure that ... there are a sufficient number of classrooms ....",
|
53 |
"courts should not disturb a state's denial of IDEA reimbursement where the chief benefits of the chosen school are the kind of advantages that might be preferred by parents of any child, disabled or not. Rather, the unilateral private placement is only appropriate if it provides education instruction [specially ] designed to meet the unique needs of a handicapped child.",
|
54 |
"It is also likely that residents of the States will attend out-of-state schools that invoke the Exemptions, and that such students will seek contraceptive services through programs in their home states, also giving rise to fiscal injuries to the States that only a nationwide injunction can remedy.",
|
55 |
-
"Although state classifications based on alienage are generally suspect, a state may reserve a government position for citizens if it is related to self-governance, involves policymaking, or requires exercise of important discretionary power over citizens. In these cases, only a rationality test is used. A public school teacher at the primary and secondary school level performs an important governmental function (e.g., he influences students' attitudes about government, the political process, citizenship, etc.), and therefore the exclusion of aliens is rationally related to the state's interest in furthering educational goals. [Ambach v. Norwick (1979)]"
|
56 |
-
|
|
|
|
|
|
|
|
|
|
|
57 |
step = 0
|
58 |
|
59 |
# BLOCKS: main user interface
|
60 |
|
|
|
|
|
61 |
with gr.Blocks() as user_eval:
|
62 |
# Title text introducing study
|
63 |
gr.Markdown("""
|
@@ -115,25 +122,44 @@ with gr.Blocks() as user_eval:
|
|
115 |
|
116 |
# Question and answering dynamics
|
117 |
with gr.Row() as question:
|
118 |
-
|
119 |
-
gr.
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
|
138 |
# Starts on question, switches to evaluation after the user answers
|
139 |
-
user_eval.launch()
|
|
|
|
|
|
28 |
json.dump({"relevance": score1, "novelty": score2, "datetime": datetime.now().isoformat()}, f)
|
29 |
f.write("\n")
|
30 |
|
31 |
+
# READING EXISTING DATA: this is used to read questions
|
32 |
|
33 |
"""
|
34 |
from datasets import load_dataset
|
|
|
52 |
"To assure that children attending public schools obtain a high quality education, the legislature shall make adequate provision to ensure that ... there are a sufficient number of classrooms ....",
|
53 |
"courts should not disturb a state's denial of IDEA reimbursement where the chief benefits of the chosen school are the kind of advantages that might be preferred by parents of any child, disabled or not. Rather, the unilateral private placement is only appropriate if it provides education instruction [specially ] designed to meet the unique needs of a handicapped child.",
|
54 |
"It is also likely that residents of the States will attend out-of-state schools that invoke the Exemptions, and that such students will seek contraceptive services through programs in their home states, also giving rise to fiscal injuries to the States that only a nationwide injunction can remedy.",
|
55 |
+
"Although state classifications based on alienage are generally suspect, a state may reserve a government position for citizens if it is related to self-governance, involves policymaking, or requires exercise of important discretionary power over citizens. In these cases, only a rationality test is used. A public school teacher at the primary and secondary school level performs an important governmental function (e.g., he influences students' attitudes about government, the political process, citizenship, etc.), and therefore the exclusion of aliens is rationally related to the state's interest in furthering educational goals. [Ambach v. Norwick (1979)]",
|
56 |
+
"Passage 9",
|
57 |
+
"Passage 10"]
|
58 |
+
generation_4 = "Here is a generation at 4 passages."
|
59 |
+
generation_6 = "Here is a generation at 6 passages."
|
60 |
+
generation_10 = "Here is a generation at 10 passages."
|
61 |
+
gold_passage = "GOLD PASSAGE"
|
62 |
step = 0
|
63 |
|
64 |
# BLOCKS: main user interface
|
65 |
|
66 |
+
user_id = "NO_ID"
|
67 |
+
|
68 |
with gr.Blocks() as user_eval:
|
69 |
# Title text introducing study
|
70 |
gr.Markdown("""
|
|
|
122 |
|
123 |
# Question and answering dynamics
|
124 |
with gr.Row() as question:
|
125 |
+
if user_id == "NO_ID":
|
126 |
+
with gr.Column():
|
127 |
+
gr.Markdown("---")
|
128 |
+
gr.Markdown("# Enter email to start")
|
129 |
+
gr.Markdown("Thank you so much for your participation in our study! Please enter your email — we're using it to keep track of which questions you've answered and which you haven't seen. Use the same email every time to keep your progress saved. :)")
|
130 |
+
email = gr.Textbox("Email")
|
131 |
+
s = gr.Button("Start!")
|
132 |
+
|
133 |
+
def submit_email():
|
134 |
+
global user_id
|
135 |
+
user_id = email
|
136 |
+
return {
|
137 |
+
question: gr.Row(visible = True),
|
138 |
+
}
|
139 |
+
|
140 |
+
s.click(fn = submit_email, outputs = [question])
|
141 |
+
else:
|
142 |
+
with gr.Column():
|
143 |
+
gr.Markdown("---")
|
144 |
+
gr.Markdown("**Question**")
|
145 |
+
gr.Markdown(question_text)
|
146 |
+
a = gr.Button(answers_text[0])
|
147 |
+
b = gr.Button(answers_text[1])
|
148 |
+
c = gr.Button(answers_text[2])
|
149 |
+
d = gr.Button(answers_text[3])
|
150 |
+
|
151 |
+
def answer():
|
152 |
+
return {
|
153 |
+
question: gr.Row(visible = False),
|
154 |
+
evals: gr.Row(visible = True)
|
155 |
+
}
|
156 |
+
|
157 |
+
a.click(fn = answer, outputs = [question, evals])
|
158 |
+
b.click(fn = answer, outputs = [question, evals])
|
159 |
+
c.click(fn = answer, outputs = [question, evals])
|
160 |
+
d.click(fn = answer, outputs = [question, evals])
|
161 |
|
162 |
# Starts on question, switches to evaluation after the user answers
|
163 |
+
user_eval.launch()
|
164 |
+
|
165 |
+
# https://github.com/gradio-app/gradio/issues/5791
|
sample.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
token_estimates.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import csv
|
2 |
+
import tiktoken
|
3 |
+
|
4 |
+
questions_and_passages = []
|
5 |
+
|
6 |
+
with open("sample.csv") as f:
|
7 |
+
reader = csv.reader(f)
|
8 |
+
next(f)
|
9 |
+
for row in reader:
|
10 |
+
entry = []
|
11 |
+
entry.append((row[1] + " " + row[2]).strip()) # optional prompt + question
|
12 |
+
entry.append(row[9]) # first of 10 passages
|
13 |
+
for i in range(10, 19): entry[1] += " " + row[i] # next 9 passages all separated with a space
|
14 |
+
questions_and_passages.append(entry)
|
15 |
+
|
16 |
+
enc = tiktoken.encoding_for_model("gpt-4o")
|
17 |
+
|
18 |
+
question_tokens = 0
|
19 |
+
question_passage_tokens = 0
|
20 |
+
max_qt = 0
|
21 |
+
max_pt = 0
|
22 |
+
max_qpt = 0
|
23 |
+
|
24 |
+
for entry in questions_and_passages:
|
25 |
+
qt = len(enc.encode(entry[0]))
|
26 |
+
question_tokens += qt
|
27 |
+
if qt > max_qt: max_qt = qt
|
28 |
+
pt = len(enc.encode(entry[1]))
|
29 |
+
question_passage_tokens += qt + pt
|
30 |
+
if pt > max_pt: max_pt = pt
|
31 |
+
if qt + pt > max_qpt: max_qpt = qt + pt
|
32 |
+
|
33 |
+
print("Average question length, gpt-4o tokens: " + str(question_tokens / len(questions_and_passages)))
|
34 |
+
print("Longest question (tokens): " + str(max_qt))
|
35 |
+
print("Average question + 10 passages length, gpt-4o tokens: " + str(question_passage_tokens / len(questions_and_passages)))
|
36 |
+
print("Longest set of ten passages (tokens): " + str(max_pt))
|
37 |
+
print("Longest combination of question and passages: " + str(max_qpt))
|