HansBug commited on
Commit
9f8187d
1 Parent(s): 794ea91

dev(hansbug): push

Browse files
llmriddles/questions/__init__.py CHANGED
@@ -1,7 +1,9 @@
1
  from .executor import QuestionExecutor
2
  from .level1 import __file__ as _level1_file_
 
3
  from .level3 import __file__ as _level3_file_
4
  from .question import Question, register_question, list_ordered_questions
5
 
6
  _ = _level1_file_
 
7
  _ = _level3_file_
 
1
  from .executor import QuestionExecutor
2
  from .level1 import __file__ as _level1_file_
3
+ from .level2 import __file__ as _level2_file_
4
  from .level3 import __file__ as _level3_file_
5
  from .question import Question, register_question, list_ordered_questions
6
 
7
  _ = _level1_file_
8
+ _ = _level2_file_
9
  _ = _level3_file_
llmriddles/questions/level2.py ADDED
@@ -0,0 +1,299 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from typing import Optional, Tuple
3
+
4
+ import sympy
5
+
6
+ from .question import register_question
7
+
8
+ CN_TEXT_1 = """
9
+ 你需要提出一个字数是质数的问题,使回答的长度刚好是它的下一个质数。
10
+ """
11
+ EN_TEXT_1 = """
12
+ You need to come up with a question that has a prime number of words, so the answer's length is exactly the next prime number.
13
+ """
14
+
15
+
16
+ def _is_prime(v):
17
+ return sympy.isprime(v)
18
+
19
+
20
+ def _next_prime(v):
21
+ while v:
22
+ v += 1
23
+ if _is_prime(v):
24
+ return v
25
+
26
+
27
+ def _cn_checker_1(question_text: str, user_text: str, answer_text: str) -> Tuple[bool, Optional[str]]:
28
+ qs_length = len(user_text.strip())
29
+ if not _is_prime(qs_length):
30
+ return False, f'问题长度为{qs_length},非质数'
31
+
32
+ answer_value = len(answer_text)
33
+ next_prime = _next_prime(qs_length)
34
+ if answer_value != next_prime:
35
+ return False, f'下一个质数为{next_prime},但回答长度为{answer_value}'
36
+
37
+ return True, None
38
+
39
+
40
+ def _en_words(text: str):
41
+ return len(re.findall(r'\w+', text))
42
+
43
+
44
+ def _en_checker_1(question_text: str, user_text: str, answer_text: str) -> Tuple[bool, Optional[str]]:
45
+ qs_length = _en_words(user_text.strip())
46
+ if not _is_prime(qs_length):
47
+ return False, f'The question has a length of {qs_length}, which is not a prime number'
48
+
49
+ answer_value = _en_words(answer_text)
50
+ next_prime = _next_prime(qs_length)
51
+ if answer_value != next_prime:
52
+ return False, f'The next prime number is {next_prime}, but the answer\'s length is {answer_value}'
53
+
54
+ return True, None
55
+
56
+
57
+ register_question({
58
+ 'cn': CN_TEXT_1,
59
+ 'en': EN_TEXT_1,
60
+ }, checkers={
61
+ 'cn': _cn_checker_1,
62
+ 'en': _en_checker_1,
63
+ })
64
+
65
+ CN_TEXT_2 = """
66
+ 请输入一个大于一的正整数作为问题,使回答里包含至少比它大一千的数。
67
+ """
68
+ EN_TEXT_2 = """
69
+ Please enter a positive integer greater than one as a question, so that the answer contains a number that is at least one thousand more than it.
70
+ """
71
+
72
+
73
+ def _cn_checker_2(question_text: str, user_text: str, answer_text: str) -> Tuple[bool, Optional[str]]:
74
+ try:
75
+ value = int(user_text.strip())
76
+ if value <= 1:
77
+ raise ValueError
78
+ except (TypeError, ValueError):
79
+ return False, f'输入内容{user_text!r},并非一个大于1的正整数'
80
+
81
+ for value_item in re.findall('[-+]?\d+', answer_text):
82
+ value_item = int(value_item)
83
+ if value_item >= value + 1000:
84
+ return True, f'检测到输出中数字{value_item},满足要求'
85
+
86
+ return False, f'未在输出中检测到不少于{value + 1000}的数字'
87
+
88
+
89
+ def _en_checker_2(question_text: str, user_text: str, answer_text: str) -> Tuple[bool, Optional[str]]:
90
+ try:
91
+ value = int(user_text.strip())
92
+ if value <= 1:
93
+ raise ValueError
94
+ except (TypeError, ValueError):
95
+ return False, f'You entered {user_text!r}, which is not a positive integer greater than 1'
96
+
97
+ for value_item in re.findall('[-+]?\d+', answer_text):
98
+ value_item = int(value_item)
99
+ if value_item >= value + 1000:
100
+ return True, f'Detected the number {value_item} in the output, which meets the requirement'
101
+
102
+ return False, f'Did not detect a number of at least {value + 1000} in the output'
103
+
104
+
105
+ register_question({
106
+ 'cn': CN_TEXT_2,
107
+ 'en': EN_TEXT_2,
108
+ }, checkers={
109
+ 'cn': _cn_checker_2,
110
+ 'en': _en_checker_2,
111
+ })
112
+
113
+ CN_TEXT_3 = """
114
+ 请输入一个大于一的正整数作为问题,使回答里包含至少10个不同的至少比它小一千的自然数。
115
+ """
116
+ EN_TEXT_3 = """
117
+ Please enter a positive integer greater than one as a question, so that the answer contains at least 10 different natural numbers, each of them at least 1000 less than the question.
118
+ """
119
+
120
+
121
+ def _cn_checker_3(question_text: str, user_text: str, answer_text: str) -> Tuple[bool, Optional[str]]:
122
+ try:
123
+ value = int(user_text.strip())
124
+ if value <= 1:
125
+ raise ValueError
126
+ except (TypeError, ValueError):
127
+ return False, f'输入内容{user_text!r},并非一个大于1的正整数'
128
+
129
+ collected_values = []
130
+ for value_item in re.findall('[-+]?\d+', answer_text):
131
+ value_item = int(value_item)
132
+ if value_item <= value - 1000:
133
+ collected_values.append(value_item)
134
+
135
+ collected_values = sorted(set(collected_values))
136
+ if len(collected_values) >= 10:
137
+ return True, f'检测到{len(collected_values)}个不大于{value - 1000}的数字:{collected_values!r}'
138
+ else:
139
+ return False, f'检测到{len(collected_values)}个不大于{value - 1000}的数字,未达到10个:{collected_values!r}'
140
+
141
+
142
+ def _en_checker_3(question_text: str, user_text: str, answer_text: str) -> Tuple[bool, Optional[str]]:
143
+ try:
144
+ value = int(user_text.strip())
145
+ if value <= 1:
146
+ raise ValueError
147
+ except (TypeError, ValueError):
148
+ return False, f'You entered {user_text!r}, which is not a positive integer greater than 1'
149
+
150
+ collected_values = []
151
+ for value_item in re.findall('[-+]?\d+', answer_text):
152
+ value_item = int(value_item)
153
+ if value_item <= value - 1000:
154
+ collected_values.append(value_item)
155
+
156
+ collected_values = sorted(set(collected_values))
157
+ if len(collected_values) >= 10:
158
+ return True, f'Detected {len(collected_values)} numbers not greater than {value - 1000}: {collected_values!r}'
159
+ else:
160
+ return False, f'Detected {len(collected_values)} numbers not greater than {value - 1000}, ' \
161
+ f'but it doesn\'t reach 10: {collected_values!r}'
162
+
163
+
164
+ register_question({
165
+ 'cn': CN_TEXT_3,
166
+ 'en': EN_TEXT_3,
167
+ }, checkers={
168
+ 'cn': _cn_checker_3,
169
+ 'en': _en_checker_3,
170
+ })
171
+
172
+ CN_TEXT_4 = """
173
+ 请在不提及1,4,5这三个字符的情况下让模型回答114514,不要有多余的字符。
174
+ """
175
+ EN_TEXT_4 = """
176
+ Please make the model answer "114514" without mentioning the characters 1, 4, and 5, and without any extra characters.
177
+ """
178
+
179
+
180
+ def _cn_checker_4(question_text: str, user_text: str, answer_text: str) -> Tuple[bool, Optional[str]]:
181
+ if "1" in user_text or "4" in user_text or "5" in user_text:
182
+ return False, '请不要在提问中出现1、4、5三个数字'
183
+
184
+ return answer_text.strip() == '114514', None
185
+
186
+
187
+ def _en_checker_4(question_text: str, user_text: str, answer_text: str) -> Tuple[bool, Optional[str]]:
188
+ if "1" in user_text or "4" in user_text or "5" in user_text:
189
+ return False, 'Please don\'t include the numbers 1, 4, and 5 in the question'
190
+
191
+ return answer_text.strip() == '114514', None
192
+
193
+
194
+ register_question({
195
+ 'cn': CN_TEXT_4,
196
+ 'en': EN_TEXT_4,
197
+ }, checkers={
198
+ 'cn': _cn_checker_4,
199
+ 'en': _en_checker_4,
200
+ })
201
+
202
+ CN_TEXT_5 = """
203
+ 请输入一个大于一的平方数作为问题,使回答里包含它的平方根对应的数。
204
+ """
205
+ EN_TEXT_5 = """
206
+ Please provide a square number greater than one as the question, and the answer should include the number corresponding to its square root.
207
+ """
208
+
209
+
210
+ def _cn_checker_5(question_text: str, user_text: str, answer_text: str) -> Tuple[bool, Optional[str]]:
211
+ try:
212
+ value = int(user_text.strip())
213
+ if value <= 1:
214
+ raise ValueError
215
+ if int(value ** 0.5) ** 2 != value:
216
+ raise ValueError
217
+ except (TypeError, ValueError):
218
+ return False, f'输入内容{user_text!r},并非一个大于1的平方数'
219
+
220
+ sq = int(value ** 0.5)
221
+ for value_item in re.findall('[-+]?\d+', answer_text):
222
+ value_item = int(value_item)
223
+ if value_item == sq:
224
+ return True, f'检测到平方根数{sq}'
225
+ else:
226
+ return False, f'未检测到平方根数{sq}'
227
+
228
+
229
+ def _en_checker_5(question_text: str, user_text: str, answer_text: str) -> Tuple[bool, Optional[str]]:
230
+ try:
231
+ value = int(user_text.strip())
232
+ if value <= 1:
233
+ raise ValueError
234
+ if int(value ** 0.5) ** 2 != value:
235
+ raise ValueError
236
+ except (TypeError, ValueError):
237
+ return False, f'The input, {user_text!r}, is not a square number greater than 1'
238
+
239
+ sq = int(value ** 0.5)
240
+ for value_item in re.findall('[-+]?\d+', answer_text):
241
+ value_item = int(value_item)
242
+ if value_item == sq:
243
+ return True, f'Detected square root: {sq}'
244
+ else:
245
+ return False, f'Square root {sq} not detected'
246
+
247
+
248
+ register_question({
249
+ 'cn': CN_TEXT_5,
250
+ 'en': EN_TEXT_5,
251
+ }, checkers={
252
+ 'cn': _cn_checker_5,
253
+ 'en': _en_checker_5,
254
+ })
255
+
256
+ CN_TEXT_6 = """
257
+ 请提一个不包含“狗”这个字的10个字以内的问题,使回答中“狗”这个字出现的次数至少是问题字数的2倍。
258
+ """
259
+ EN_TEXT_6 = """
260
+ Please ask a question in under 10 words without the word "dog" and ensure that the word "dog" appears at least twice in the answer for each word in the question.
261
+ """
262
+
263
+
264
+ def _cn_checker_6(question_text: str, user_text: str, answer_text: str) -> Tuple[bool, Optional[str]]:
265
+ if len(user_text) > 10:
266
+ return False, '问题不得超过10个字'
267
+ if '狗' in user_text:
268
+ return False, '问题不得包含“狗”字'
269
+
270
+ dog_count = len(re.findall('狗', answer_text))
271
+ if dog_count >= len(user_text) * 2:
272
+ return True, f'“狗”字的出现次数为{dog_count}次'
273
+ else:
274
+ return False, f'“狗”字的出现次数为{dog_count}次,未达到{len(user_text) * 2}次'
275
+
276
+
277
+ def _en_checker_6(question_text: str, user_text: str, answer_text: str) -> Tuple[bool, Optional[str]]:
278
+ q_words = re.findall(r'\w+', user_text.lower())
279
+ if len(q_words) > 10:
280
+ return False, 'The question must not exceed 10 words'
281
+ if any(word in {'dog', 'dogs'} for word in q_words):
282
+ return False, 'The question must not contain the word "dog" or "dogs"'
283
+
284
+ a_words = re.findall(r'\w+', answer_text.lower())
285
+ a_dog_count = sum(1 if word in {'dog', 'dogs'} else 0 for word in a_words)
286
+ if a_dog_count >= len(q_words) * 2:
287
+ return True, f'The word "dog" (or "dogs") appears {a_dog_count} times.'
288
+ else:
289
+ return False, f'The word "dog" (or "dogs") appears {a_dog_count} times, ' \
290
+ f'which is less than {len(q_words) * 2} times.'
291
+
292
+
293
+ register_question({
294
+ 'cn': CN_TEXT_6,
295
+ 'en': EN_TEXT_6,
296
+ }, checkers={
297
+ 'cn': _cn_checker_6,
298
+ 'en': _en_checker_6,
299
+ })
llmriddles/questions/question.py CHANGED
@@ -23,8 +23,8 @@ def register_question(text: Union[Mapping[str, str], str],
23
  if isinstance(checkers, collections.abc.Mapping):
24
  _origin_checkers = checkers
25
 
26
- def _integrated_checker(question_text: str, answer_text: str, lang: str):
27
- return _origin_checkers[lang](question_text, answer_text)
28
 
29
  checker: MultiLangCheckerTyping = _integrated_checker
30
  else:
 
23
  if isinstance(checkers, collections.abc.Mapping):
24
  _origin_checkers = checkers
25
 
26
+ def _integrated_checker(question_text: str, user_text: str, answer_text: str, lang: str):
27
+ return _origin_checkers[lang](question_text, user_text, answer_text)
28
 
29
  checker: MultiLangCheckerTyping = _integrated_checker
30
  else:
requirements.txt CHANGED
@@ -2,4 +2,5 @@ hbutils>=0.9.1
2
  tqdm
3
  requests>=2.20
4
  gradio==4.1.1
5
- openai>=1
 
 
2
  tqdm
3
  requests>=2.20
4
  gradio==4.1.1
5
+ openai>=1
6
+ sympy