DevBM commited on
Commit
9e75c6e
1 Parent(s): 65b7df9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -9
app.py CHANGED
@@ -36,8 +36,8 @@ st.set_page_config(
36
  "About" : "#Hi this our project."
37
  }
38
  )
39
- # st.set_option(deprecation.showPyplotGlobalUse=False)
40
- # st.set_option('base','dark')
41
  # Initialize Wikipedia API with a user agent
42
  user_agent = 'QGen/1.0 (channingfisher7@gmail.com)'
43
  wiki_wiki = wikipediaapi.Wikipedia(user_agent= user_agent,language='en')
@@ -65,8 +65,8 @@ def set_state(session_id, key, value):
65
  st.session_state.session_states[session_id][key] = value
66
 
67
  @st.cache_resource
68
- def load_model():
69
- model_name = "DevBM/t5-large-squad"
70
  model = T5ForConditionalGeneration.from_pretrained(model_name)
71
  tokenizer = T5Tokenizer.from_pretrained(model_name)
72
  return model, tokenizer
@@ -88,10 +88,48 @@ def load_qa_models():
88
  return similarity_model, spell
89
 
90
  nlp, s2v = load_nlp_models()
91
- model, tokenizer = load_model()
92
  similarity_model, spell = load_qa_models()
93
  context_model = similarity_model
94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  def get_pdf_text(pdf_file):
96
  doc = pymupdf.open(stream=pdf_file.read(), filetype="pdf")
97
  text = ""
@@ -124,7 +162,7 @@ def clean_text(text):
124
  return text
125
 
126
  # Function to create text chunks
127
- def segment_text(text, max_segment_length=1000):
128
  """Segment the text into smaller chunks."""
129
  sentences = sent_tokenize(text)
130
  segments = []
@@ -268,7 +306,7 @@ def entity_linking(keyword):
268
  def generate_question(context, answer, num_beams):
269
  input_text = f"<context> {context} <answer> {answer}"
270
  input_ids = tokenizer.encode(input_text, return_tensors='pt')
271
- outputs = model.generate(input_ids, num_beams=num_beams, early_stopping=True)
272
  question = tokenizer.decode(outputs[0], skip_special_tokens=True)
273
  return question
274
 
@@ -337,8 +375,10 @@ def main():
337
  st.title(":blue[Question Generator System]")
338
  session_id = get_session_id()
339
  state = initialize_state(session_id)
340
-
341
  with st.sidebar:
 
 
 
342
  st.subheader("Customization Options")
343
  # Customization options
344
  input_type = st.radio("Select Input Preference", ("Text Input","Upload PDF"))
@@ -356,7 +396,10 @@ def main():
356
  extract_all_keywords = st.toggle("Extract Max Keywords",value=False)
357
  with col2:
358
  enable_feedback_mode = st.toggle("Enable Feedback Mode",False)
 
359
  # set_state(session_id, 'generated_questions', state['generated_questions'])
 
 
360
  text = None
361
  if input_type == "Text Input":
362
  text = st.text_area("Enter text here:", value="Joe Biden, the current US president is on a weak wicket going in for his reelection later this November against former President Donald Trump.")
@@ -445,12 +488,13 @@ def main():
445
  # Export buttons
446
  # if st.session_state.generated_questions:
447
  if state['generated_questions']:
448
- with st.sidebar:
449
  csv_data = export_to_csv(state['generated_questions'])
450
  st.download_button(label="Download CSV", data=csv_data, file_name='questions.csv', mime='text/csv')
451
 
452
  pdf_data = export_to_pdf(state['generated_questions'])
453
  st.download_button(label="Download PDF", data=pdf_data, file_name='questions.pdf', mime='application/pdf')
 
454
  with st.expander("View Visualizations"):
455
  questions = [tpl['question'] for tpl in state['generated_questions']]
456
  overall_scores = [tpl['overall_score'] for tpl in state['generated_questions']]
 
36
  "About" : "#Hi this our project."
37
  }
38
  )
39
+
40
+ st.set_option('deprecation.showPyplotGlobalUse',False)
41
  # Initialize Wikipedia API with a user agent
42
  user_agent = 'QGen/1.0 (channingfisher7@gmail.com)'
43
  wiki_wiki = wikipediaapi.Wikipedia(user_agent= user_agent,language='en')
 
65
  st.session_state.session_states[session_id][key] = value
66
 
67
  @st.cache_resource
68
+ def load_model(modelname):
69
+ model_name = modelname
70
  model = T5ForConditionalGeneration.from_pretrained(model_name)
71
  tokenizer = T5Tokenizer.from_pretrained(model_name)
72
  return model, tokenizer
 
88
  return similarity_model, spell
89
 
90
  nlp, s2v = load_nlp_models()
91
+ model, tokenizer = load_model('DevBM/t5-large-small')
92
  similarity_model, spell = load_qa_models()
93
  context_model = similarity_model
94
 
95
+ # Info Section
96
+ def display_info():
97
+ st.sidebar.title("Information")
98
+ st.sidebar.markdown("""
99
+ ### Question Generator System
100
+ This system is designed to generate questions based on the provided context. It uses various NLP techniques and models to:
101
+ - Extract keywords from the text
102
+ - Map keywords to sentences
103
+ - Generate questions
104
+ - Provide multiple choice options
105
+ - Assess the quality of generated questions
106
+
107
+ #### Key Features:
108
+ - **Keyword Extraction:** Combines RAKE, TF-IDF, and spaCy for comprehensive keyword extraction.
109
+ - **Question Generation:** Utilizes a pre-trained T5 model for generating questions.
110
+ - **Options Generation:** Creates contextually relevant multiple-choice options.
111
+ - **Question Assessment:** Scores questions based on relevance, complexity, and spelling correctness.
112
+ - **Feedback Collection:** Allows users to rate the generated questions and provides statistics on feedback.
113
+
114
+ #### Customization Options:
115
+ - Number of beams for question generation
116
+ - Context window size for mapping keywords to sentences
117
+ - Number of questions to generate
118
+ - Additional display elements (context, answer, options, entity link, QA scores)
119
+
120
+ #### Outputs:
121
+ - Generated questions with multiple-choice options
122
+ - Download options for CSV and PDF formats
123
+ - Visualization of overall scores
124
+
125
+ """)
126
+
127
+ # Text Preprocessing Function
128
+ def preprocess_text(text):
129
+ # Remove newlines and extra spaces
130
+ text = re.sub(r'\s+', ' ', text)
131
+ return text
132
+
133
  def get_pdf_text(pdf_file):
134
  doc = pymupdf.open(stream=pdf_file.read(), filetype="pdf")
135
  text = ""
 
162
  return text
163
 
164
  # Function to create text chunks
165
+ def segment_text(text, max_segment_length=500):
166
  """Segment the text into smaller chunks."""
167
  sentences = sent_tokenize(text)
168
  segments = []
 
306
  def generate_question(context, answer, num_beams):
307
  input_text = f"<context> {context} <answer> {answer}"
308
  input_ids = tokenizer.encode(input_text, return_tensors='pt')
309
+ outputs = model.generate(input_ids, num_beams=num_beams, early_stopping=True, max_length=150)
310
  question = tokenizer.decode(outputs[0], skip_special_tokens=True)
311
  return question
312
 
 
375
  st.title(":blue[Question Generator System]")
376
  session_id = get_session_id()
377
  state = initialize_state(session_id)
 
378
  with st.sidebar:
379
+ show_info = st.toggle('Show Info',True)
380
+ if show_info:
381
+ display_info()
382
  st.subheader("Customization Options")
383
  # Customization options
384
  input_type = st.radio("Select Input Preference", ("Text Input","Upload PDF"))
 
396
  extract_all_keywords = st.toggle("Extract Max Keywords",value=False)
397
  with col2:
398
  enable_feedback_mode = st.toggle("Enable Feedback Mode",False)
399
+ use_t5_small = st.toggle("Use T5-Small",False)
400
  # set_state(session_id, 'generated_questions', state['generated_questions'])
401
+ if use_t5_small is True:
402
+ model, tokenizer = load_model('AneriThakkar/flan-t5-small-finetuned')
403
  text = None
404
  if input_type == "Text Input":
405
  text = st.text_area("Enter text here:", value="Joe Biden, the current US president is on a weak wicket going in for his reelection later this November against former President Donald Trump.")
 
488
  # Export buttons
489
  # if st.session_state.generated_questions:
490
  if state['generated_questions']:
491
+ with st.sidebar:
492
  csv_data = export_to_csv(state['generated_questions'])
493
  st.download_button(label="Download CSV", data=csv_data, file_name='questions.csv', mime='text/csv')
494
 
495
  pdf_data = export_to_pdf(state['generated_questions'])
496
  st.download_button(label="Download PDF", data=pdf_data, file_name='questions.pdf', mime='application/pdf')
497
+
498
  with st.expander("View Visualizations"):
499
  questions = [tpl['question'] for tpl in state['generated_questions']]
500
  overall_scores = [tpl['overall_score'] for tpl in state['generated_questions']]