elibrowne commited on
Commit
a9fe74f
1 Parent(s): f2c0ef6

Login feature, token estimates

Browse files
Files changed (3) hide show
  1. app.py +49 -23
  2. sample.csv +0 -0
  3. token_estimates.py +37 -0
app.py CHANGED
@@ -28,7 +28,7 @@ def save_json(score1, score2):
28
  json.dump({"relevance": score1, "novelty": score2, "datetime": datetime.now().isoformat()}, f)
29
  f.write("\n")
30
 
31
- # READING EXISTING DATA: this is used to read questionss
32
 
33
  """
34
  from datasets import load_dataset
@@ -52,12 +52,19 @@ passage_texts = ["The IDEA also requires states receiving federal funds to educa
52
  "To assure that children attending public schools obtain a high quality education, the legislature shall make adequate provision to ensure that ... there are a sufficient number of classrooms ....",
53
  "courts should not disturb a state's denial of IDEA reimbursement where the chief benefits of the chosen school are the kind of advantages that might be preferred by parents of any child, disabled or not. Rather, the unilateral private placement is only appropriate if it provides education instruction [specially ] designed to meet the unique needs of a handicapped child.",
54
  "It is also likely that residents of the States will attend out-of-state schools that invoke the Exemptions, and that such students will seek contraceptive services through programs in their home states, also giving rise to fiscal injuries to the States that only a nationwide injunction can remedy.",
55
- "Although state classifications based on alienage are generally suspect, a state may reserve a government position for citizens if it is related to self-governance, involves policymaking, or requires exercise of important discretionary power over citizens. In these cases, only a rationality test is used. A public school teacher at the primary and secondary school level performs an important governmental function (e.g., he influences students' attitudes about government, the political process, citizenship, etc.), and therefore the exclusion of aliens is rationally related to the state's interest in furthering educational goals. [Ambach v. Norwick (1979)]"
56
- ]
 
 
 
 
 
57
  step = 0
58
 
59
  # BLOCKS: main user interface
60
 
 
 
61
  with gr.Blocks() as user_eval:
62
  # Title text introducing study
63
  gr.Markdown("""
@@ -115,25 +122,44 @@ with gr.Blocks() as user_eval:
115
 
116
  # Question and answering dynamics
117
  with gr.Row() as question:
118
- with gr.Column():
119
- gr.Markdown("---")
120
- gr.Markdown("**Question**")
121
- gr.Markdown(question_text)
122
- a = gr.Button(answers_text[0])
123
- b = gr.Button(answers_text[1])
124
- c = gr.Button(answers_text[2])
125
- d = gr.Button(answers_text[3])
126
-
127
- def answer():
128
- return {
129
- question: gr.Row(visible = False),
130
- evals: gr.Row(visible = True)
131
- }
132
-
133
- a.click(fn = answer, outputs = [question, evals])
134
- b.click(fn = answer, outputs = [question, evals])
135
- c.click(fn = answer, outputs = [question, evals])
136
- d.click(fn = answer, outputs = [question, evals])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
 
138
  # Starts on question, switches to evaluation after the user answers
139
- user_eval.launch()
 
 
 
28
  json.dump({"relevance": score1, "novelty": score2, "datetime": datetime.now().isoformat()}, f)
29
  f.write("\n")
30
 
31
+ # READING EXISTING DATA: this is used to read questions
32
 
33
  """
34
  from datasets import load_dataset
 
52
  "To assure that children attending public schools obtain a high quality education, the legislature shall make adequate provision to ensure that ... there are a sufficient number of classrooms ....",
53
  "courts should not disturb a state's denial of IDEA reimbursement where the chief benefits of the chosen school are the kind of advantages that might be preferred by parents of any child, disabled or not. Rather, the unilateral private placement is only appropriate if it provides education instruction [specially ] designed to meet the unique needs of a handicapped child.",
54
  "It is also likely that residents of the States will attend out-of-state schools that invoke the Exemptions, and that such students will seek contraceptive services through programs in their home states, also giving rise to fiscal injuries to the States that only a nationwide injunction can remedy.",
55
+ "Although state classifications based on alienage are generally suspect, a state may reserve a government position for citizens if it is related to self-governance, involves policymaking, or requires exercise of important discretionary power over citizens. In these cases, only a rationality test is used. A public school teacher at the primary and secondary school level performs an important governmental function (e.g., he influences students' attitudes about government, the political process, citizenship, etc.), and therefore the exclusion of aliens is rationally related to the state's interest in furthering educational goals. [Ambach v. Norwick (1979)]",
56
+ "Passage 9",
57
+ "Passage 10"]
58
+ generation_4 = "Here is a generation at 4 passages."
59
+ generation_6 = "Here is a generation at 6 passages."
60
+ generation_10 = "Here is a generation at 10 passages."
61
+ gold_passage = "GOLD PASSAGE"
62
  step = 0
63
 
64
  # BLOCKS: main user interface
65
 
66
+ user_id = "NO_ID"
67
+
68
  with gr.Blocks() as user_eval:
69
  # Title text introducing study
70
  gr.Markdown("""
 
122
 
123
  # Question and answering dynamics
124
  with gr.Row() as question:
125
+ if user_id == "NO_ID":
126
+ with gr.Column():
127
+ gr.Markdown("---")
128
+ gr.Markdown("# Enter email to start")
129
+ gr.Markdown("Thank you so much for your participation in our study! Please enter your email — we're using it to keep track of which questions you've answered and which you haven't seen. Use the same email every time to keep your progress saved. :)")
130
+ email = gr.Textbox("Email")
131
+ s = gr.Button("Start!")
132
+
133
+ def submit_email():
134
+ global user_id
135
+ user_id = email
136
+ return {
137
+ question: gr.Row(visible = True),
138
+ }
139
+
140
+ s.click(fn = submit_email, outputs = [question])
141
+ else:
142
+ with gr.Column():
143
+ gr.Markdown("---")
144
+ gr.Markdown("**Question**")
145
+ gr.Markdown(question_text)
146
+ a = gr.Button(answers_text[0])
147
+ b = gr.Button(answers_text[1])
148
+ c = gr.Button(answers_text[2])
149
+ d = gr.Button(answers_text[3])
150
+
151
+ def answer():
152
+ return {
153
+ question: gr.Row(visible = False),
154
+ evals: gr.Row(visible = True)
155
+ }
156
+
157
+ a.click(fn = answer, outputs = [question, evals])
158
+ b.click(fn = answer, outputs = [question, evals])
159
+ c.click(fn = answer, outputs = [question, evals])
160
+ d.click(fn = answer, outputs = [question, evals])
161
 
162
  # Starts on question, switches to evaluation after the user answers
163
+ user_eval.launch()
164
+
165
+ # https://github.com/gradio-app/gradio/issues/5791
sample.csv ADDED
The diff for this file is too large to render. See raw diff
 
token_estimates.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import csv
2
+ import tiktoken
3
+
4
+ questions_and_passages = []
5
+
6
+ with open("sample.csv") as f:
7
+ reader = csv.reader(f)
8
+ next(f)
9
+ for row in reader:
10
+ entry = []
11
+ entry.append((row[1] + " " + row[2]).strip()) # optional prompt + question
12
+ entry.append(row[9]) # first of 10 passages
13
+ for i in range(10, 19): entry[1] += " " + row[i] # next 9 passages all separated with a space
14
+ questions_and_passages.append(entry)
15
+
16
+ enc = tiktoken.encoding_for_model("gpt-4o")
17
+
18
+ question_tokens = 0
19
+ question_passage_tokens = 0
20
+ max_qt = 0
21
+ max_pt = 0
22
+ max_qpt = 0
23
+
24
+ for entry in questions_and_passages:
25
+ qt = len(enc.encode(entry[0]))
26
+ question_tokens += qt
27
+ if qt > max_qt: max_qt = qt
28
+ pt = len(enc.encode(entry[1]))
29
+ question_passage_tokens += qt + pt
30
+ if pt > max_pt: max_pt = pt
31
+ if qt + pt > max_qpt: max_qpt = qt + pt
32
+
33
+ print("Average question length, gpt-4o tokens: " + str(question_tokens / len(questions_and_passages)))
34
+ print("Longest question (tokens): " + str(max_qt))
35
+ print("Average question + 10 passages length, gpt-4o tokens: " + str(question_passage_tokens / len(questions_and_passages)))
36
+ print("Longest set of ten passages (tokens): " + str(max_pt))
37
+ print("Longest combination of question and passages: " + str(max_qpt))