DevBM commited on
Commit
84c3fd4
1 Parent(s): 5e04f07

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +190 -63
app.py CHANGED
@@ -19,21 +19,25 @@ from sense2vec import Sense2Vec
19
  import sense2vec
20
  from wordcloud import WordCloud
21
  import matplotlib.pyplot as plt
 
 
 
 
 
 
 
22
  print("***************************************************************")
23
 
24
  st.set_page_config(
25
  page_title="Question Generator",
26
  initial_sidebar_state="collapsed",
27
  )
28
- # Load spaCy model
29
- nlp = spacy.load("en_core_web_md")
30
- # s2v = Sense2Vec.from_disk(self=Sense2Vec,path='s2v_old')
31
 
32
- s2v = sense2vec.Sense2Vec().from_disk('s2v_old')
33
  # Initialize Wikipedia API with a user agent
34
  user_agent = 'QGen/1.0 (channingfisher7@gmail.com)'
35
  wiki_wiki = wikipediaapi.Wikipedia(user_agent= user_agent,language='en')
36
 
 
37
  @st.cache_resource
38
  def load_model():
39
  model_name = "DevBM/t5-large-squad"
@@ -41,6 +45,46 @@ def load_model():
41
  tokenizer = T5Tokenizer.from_pretrained(model_name)
42
  return model, tokenizer
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  # Function to extract keywords using combined techniques
45
  def extract_keywords(text, extract_all):
46
  doc = nlp(text)
@@ -162,8 +206,10 @@ def generate_question(context, answer, num_beams):
162
 
163
  # Function to export questions to CSV
164
  def export_to_csv(data):
165
- df = pd.DataFrame(data, columns=["Context", "Answer", "Question", "Options"])
166
- csv = df.to_csv(index=False,encoding='utf-8')
 
 
167
  return csv
168
 
169
  # Function to export questions to PDF
@@ -172,14 +218,15 @@ def export_to_pdf(data):
172
  pdf.add_page()
173
  pdf.set_font("Arial", size=12)
174
 
175
- for context, answer, question, options in data:
176
- pdf.multi_cell(0, 10, f"Context: {context}")
177
- pdf.multi_cell(0, 10, f"Answer: {answer}")
178
- pdf.multi_cell(0, 10, f"Question: {question}")
 
 
179
  pdf.ln(10)
180
 
181
- # pdf.output("questions.pdf")
182
- return pdf.output(name='questions.pdf',dest='S').encode('latin1')
183
 
184
  def display_word_cloud(generated_questions):
185
  word_frequency = {}
@@ -194,74 +241,154 @@ def display_word_cloud(generated_questions):
194
  plt.axis('off')
195
  st.pyplot()
196
 
197
- if 'data' not in st.session_state:
198
- st.session_state.data = None
199
-
200
- # Streamlit interface
201
- st.title(":blue[Question Generator from Text]")
202
- text = st.text_area("Enter text here:", value="Joe Biden, the current US president is on a weak wicket going in for his reelection later this November against former President Donald Trump.")
203
-
204
- with st.sidebar:
205
- st.subheader("Customization Options")
206
- # Customization options
207
- num_beams = st.slider("Select number of beams for question generation", min_value=1, max_value=10, value=5)
208
- context_window_size = st.slider("Select context window size (number of sentences before and after)", min_value=1, max_value=5, value=1)
209
- num_questions = st.slider("Select number of questions to generate", min_value=1, max_value=1000, value=5)
210
- with st.expander("Choose the Additional Elements to show"):
211
- show_context = st.checkbox("Context",True)
212
- show_answer = st.checkbox("Answer",True)
213
- show_options = st.checkbox("Options",False)
214
- show_entity_link = st.checkbox("Enitity Link For Wikipedia",True)
215
- extract_all_keywords = st.toggle("Extract max Keywords",value=False)
216
-
217
- if st.button("Generate Questions"):
218
- if text:
219
- model, tokenizer = load_model()
220
- keywords = extract_keywords(text,extract_all_keywords)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  print(f"\n\nFinal Keywords in Main Function: {keywords}\n\n")
222
  keyword_sentence_mapping = map_keywords_to_sentences(text, keywords, context_window_size)
223
-
224
- st.subheader("Generated Questions:",divider='blue')
225
- data = []
226
  for i, (keyword, context) in enumerate(keyword_sentence_mapping.items()):
227
  if i >= num_questions:
228
  break
229
- linked_entity = entity_linking(keyword)
230
  question = generate_question(context, keyword, num_beams=num_beams)
231
- options = generate_options(keyword, context)
232
- st.subheader(body=f":orange[Q{i+1}:] {question}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
 
234
  if show_context is True:
235
- st.write(f"**Context:** {context}")
236
  if show_answer is True:
237
- st.write(f"**Answer:** {keyword}")
238
  if show_options is True:
239
  st.write(f"**Options:**")
240
- for j, option in enumerate(options):
241
  st.write(f"{chr(65+j)}. {option}")
242
  if show_entity_link is True:
 
243
  if linked_entity:
244
  st.write(f"**Entity Link:** {linked_entity}")
245
- st.write("---")
246
- data.append((context, keyword, question, options))
 
 
 
 
 
 
 
 
 
 
 
247
 
248
- # Add the data to session state
249
- st.session_state.data = data
250
- # display_word_cloud()
251
- print(data)
252
  # Export buttons
253
- if st.session_state.data is not None:
254
  with st.sidebar:
255
- st.subheader('Download Content')
256
- csv_data = export_to_csv(data)
257
- st.download_button(label="CSV Format", data=csv_data, file_name='questions.csv', mime='text/csv')
258
 
259
- pdf_data = export_to_pdf(data)
260
- st.download_button(label="PDF Format", data=pdf_data, file_name='questions.pdf', mime='application/pdf')
261
- if st.session_state.data is not None:
262
- st.markdown("You can download the data from the sidebar.")
263
 
264
-
265
- else:
266
- st.write("Please enter some text to generate questions.")
267
- print("********************************************************************************")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  import sense2vec
20
  from wordcloud import WordCloud
21
  import matplotlib.pyplot as plt
22
+ import json
23
+ import os
24
+ from sentence_transformers import SentenceTransformer, util
25
+ import textstat
26
+ import language_tool_python
27
+ from transformers import pipeline
28
+
29
  print("***************************************************************")
30
 
31
  st.set_page_config(
32
  page_title="Question Generator",
33
  initial_sidebar_state="collapsed",
34
  )
 
 
 
35
 
 
36
  # Initialize Wikipedia API with a user agent
37
  user_agent = 'QGen/1.0 (channingfisher7@gmail.com)'
38
  wiki_wiki = wikipediaapi.Wikipedia(user_agent= user_agent,language='en')
39
 
40
+
41
  @st.cache_resource
42
  def load_model():
43
  model_name = "DevBM/t5-large-squad"
 
45
  tokenizer = T5Tokenizer.from_pretrained(model_name)
46
  return model, tokenizer
47
 
48
+ # Load Spacy Model
49
+ @st.cache_resource
50
+ def load_nlp_models():
51
+ nlp = spacy.load("en_core_web_md")
52
+ s2v = sense2vec.Sense2Vec().from_disk('s2v_old')
53
+ return nlp, s2v
54
+
55
+ # Load Quality Assurance Models
56
+ @st.cache_resource
57
+ def load_qa_models():
58
+ # Initialize BERT model for sentence similarity
59
+ similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
60
+
61
+ # Initialize language tool for grammar checking
62
+ language_tool = language_tool_python.LanguageTool('en-US')
63
+
64
+ return similarity_model, language_tool
65
+
66
+ nlp, s2v = load_nlp_models()
67
+ model, tokenizer = load_model()
68
+ similarity_model, language_tool = load_qa_models()
69
+
70
+ def save_feedback(question, answer,rating):
71
+ feedback_file = 'question_feedback.json'
72
+ if os.path.exists(feedback_file):
73
+ with open(feedback_file, 'r') as f:
74
+ feedback_data = json.load(f)
75
+ else:
76
+ feedback_data = []
77
+ tpl = {
78
+ 'question' : question,
79
+ 'answer' : answer,
80
+ 'rating' : rating,
81
+ }
82
+ # feedback_data[question] = rating
83
+ feedback_data.append(tpl)
84
+
85
+ with open(feedback_file, 'w') as f:
86
+ json.dump(feedback_data, f)
87
+
88
  # Function to extract keywords using combined techniques
89
  def extract_keywords(text, extract_all):
90
  doc = nlp(text)
 
206
 
207
  # Function to export questions to CSV
208
  def export_to_csv(data):
209
+ # df = pd.DataFrame(data, columns=["Context", "Answer", "Question", "Options"])
210
+ df = pd.DataFrame(data)
211
+ # csv = df.to_csv(index=False,encoding='utf-8')
212
+ csv = df.to_csv(index=False)
213
  return csv
214
 
215
  # Function to export questions to PDF
 
218
  pdf.add_page()
219
  pdf.set_font("Arial", size=12)
220
 
221
+ for item in data:
222
+ pdf.multi_cell(0, 10, f"Context: {item['context']}")
223
+ pdf.multi_cell(0, 10, f"Question: {item['question']}")
224
+ pdf.multi_cell(0, 10, f"Answer: {item['answer']}")
225
+ pdf.multi_cell(0, 10, f"Options: {', '.join(item['options'])}")
226
+ pdf.multi_cell(0, 10, f"Overall Score: {item['overall_score']:.2f}")
227
  pdf.ln(10)
228
 
229
+ return pdf.output(dest='S').encode('latin-1')
 
230
 
231
  def display_word_cloud(generated_questions):
232
  word_frequency = {}
 
241
  plt.axis('off')
242
  st.pyplot()
243
 
244
+
245
+ def assess_question_quality(context, question, answer):
246
+ # Assess relevance using cosine similarity
247
+ context_doc = nlp(context)
248
+ question_doc = nlp(question)
249
+ relevance_score = context_doc.similarity(question_doc)
250
+
251
+ # Assess complexity using token length (as a simple metric)
252
+ complexity_score = min(len(question_doc) / 20, 1) # Normalize to 0-1
253
+
254
+ # Assess grammatical correctness
255
+ errors = language_tool.check(question)
256
+ grammatical_correctness = 1 - (len(errors) / len(question_doc)) # Normalize to 0-1
257
+
258
+ # Calculate overall score (you can adjust weights as needed)
259
+ overall_score = (
260
+ 0.4 * relevance_score +
261
+ 0.3 * complexity_score +
262
+ 0.3 * grammatical_correctness
263
+ )
264
+
265
+ return overall_score, relevance_score, complexity_score, grammatical_correctness
266
+
267
+ def main():
268
+ # Streamlit interface
269
+ st.title(":blue[Question Generator System]")
270
+
271
+ # Initialize session state
272
+ if 'generated_questions' not in st.session_state:
273
+ st.session_state.generated_questions = []
274
+
275
+ text = st.text_area("Enter text here:", value="Joe Biden, the current US president is on a weak wicket going in for his reelection later this November against former President Donald Trump.")
276
+
277
+ with st.sidebar:
278
+ st.subheader("Customization Options")
279
+ # Customization options
280
+ num_beams = st.slider("Select number of beams for question generation", min_value=1, max_value=10, value=5)
281
+ context_window_size = st.slider("Select context window size (number of sentences before and after)", min_value=1, max_value=5, value=1)
282
+ num_questions = st.slider("Select number of questions to generate", min_value=1, max_value=1000, value=5)
283
+ with st.expander("Choose the Additional Elements to show"):
284
+ show_context = st.checkbox("Context",True)
285
+ show_answer = st.checkbox("Answer",True)
286
+ show_options = st.checkbox("Options",False)
287
+ show_entity_link = st.checkbox("Entity Link For Wikipedia",True)
288
+ show_qa_scores = st.checkbox("QA Score",False)
289
+ col1, col2 = st.columns(2)
290
+ with col1:
291
+ extract_all_keywords = st.toggle("Extract Max Keywords",value=False)
292
+ with col2:
293
+ enable_feedback_mode = st.toggle("Enable Feedback Mode",False)
294
+
295
+ generate_questions_button = st.button("Generate Questions")
296
+ if generate_questions_button and text:
297
+ st.session_state.generated_questions = []
298
+ keywords = extract_keywords(text, extract_all_keywords)
299
  print(f"\n\nFinal Keywords in Main Function: {keywords}\n\n")
300
  keyword_sentence_mapping = map_keywords_to_sentences(text, keywords, context_window_size)
 
 
 
301
  for i, (keyword, context) in enumerate(keyword_sentence_mapping.items()):
302
  if i >= num_questions:
303
  break
 
304
  question = generate_question(context, keyword, num_beams=num_beams)
305
+ options = generate_options(keyword,context)
306
+ overall_score, relevance_score, complexity_score, grammatical_correctness = assess_question_quality(context,question,keyword)
307
+ tpl = {
308
+ "question" : question,
309
+ "context" : context,
310
+ "answer" : keyword,
311
+ "options" : options,
312
+ "overall_score" : overall_score,
313
+ "relevance_score" : relevance_score,
314
+ "complexity_score" : complexity_score,
315
+ "grammatical_correctness" : grammatical_correctness,
316
+ }
317
+ st.session_state.generated_questions.append(tpl)
318
+
319
+ # Display generated questions
320
+ if st.session_state.generated_questions:
321
+ st.header("Generated Questions:",divider='blue')
322
+ for i, q in enumerate(st.session_state.generated_questions):
323
+ # with st.expander(f"Question {i+1}"):
324
+ st.subheader(body=f":orange[Q{i+1}:] {q['question']}")
325
 
326
  if show_context is True:
327
+ st.write(f"**Context:** {q['context']}")
328
  if show_answer is True:
329
+ st.write(f"**Answer:** {q['answer']}")
330
  if show_options is True:
331
  st.write(f"**Options:**")
332
+ for j, option in enumerate(q['options']):
333
  st.write(f"{chr(65+j)}. {option}")
334
  if show_entity_link is True:
335
+ linked_entity = entity_linking(q['answer'])
336
  if linked_entity:
337
  st.write(f"**Entity Link:** {linked_entity}")
338
+ if show_qa_scores is True:
339
+ st.write(f"**Overall Quality Score:** {q['overall_score']:.2f}")
340
+ st.write(f"**Relevance Score:** {q['relevance_score']:.2f}")
341
+ st.write(f"**Complexity Score:** {q['complexity_score']:.2f}")
342
+ st.write(f"**Grammatical Correctness:** {q['grammatical_correctness']:.2f}")
343
+
344
+ # q['context'] = st.text_area(f"Edit Context {i+1}:", value=q['context'], key=f"context_{i}")
345
+ if enable_feedback_mode:
346
+ q['question'] = st.text_input(f"Edit Question {i+1}:", value=q['question'], key=f"question_{i}")
347
+ q['rating'] = st.selectbox(f"Rate this question (1-5)", options=[1, 2, 3, 4, 5], key=f"rating_{i}")
348
+ if st.button(f"Submit Feedback for Question {i+1}", key=f"submit_{i}"):
349
+ save_feedback(q['question'], q['answer'], q['rating'])
350
+ st.success(f"Feedback submitted for Question {i+1}")
351
 
 
 
 
 
352
  # Export buttons
353
+ if st.session_state.generated_questions:
354
  with st.sidebar:
355
+ csv_data = export_to_csv(st.session_state.generated_questions)
356
+ st.download_button(label="Download CSV", data=csv_data, file_name='questions.csv', mime='text/csv')
 
357
 
358
+ pdf_data = export_to_pdf(st.session_state.generated_questions)
359
+ st.download_button(label="Download PDF", data=pdf_data, file_name='questions.pdf', mime='application/pdf')
 
 
360
 
361
+ # View Feedback Statistics
362
+ with st.expander("View Feedback Statistics"):
363
+ feedback_file = 'question_feedback.json'
364
+ if os.path.exists(feedback_file):
365
+ with open(feedback_file, 'r') as f:
366
+ feedback_data = json.load(f)
367
+
368
+ st.subheader("Feedback Statistics")
369
+
370
+ # Calculate average rating
371
+ ratings = [feedback['rating'] for feedback in feedback_data]
372
+ avg_rating = sum(ratings) / len(ratings) if ratings else 0
373
+ st.write(f"Average Question Rating: {avg_rating:.2f}")
374
+
375
+ # Show distribution of ratings
376
+ rating_counts = {i: ratings.count(i) for i in range(1, 6)}
377
+ st.bar_chart(rating_counts)
378
+
379
+ # Show some highly rated questions
380
+ st.subheader("Highly Rated Questions")
381
+ sorted_feedback = sorted(feedback_data, key=lambda x: x['rating'], reverse=True)
382
+ top_questions = sorted_feedback[:5]
383
+ for feedback in top_questions:
384
+ st.write(f"Question: {feedback['question']}")
385
+ st.write(f"Answer: {feedback['answer']}")
386
+ st.write(f"Rating: {feedback['rating']}")
387
+ st.write("---")
388
+ else:
389
+ st.write("No feedback data available yet.")
390
+
391
+ print("********************************************************************************")
392
+
393
+ if __name__ == '__main__':
394
+ main()