DevBM commited on
Commit
9dee841
1 Parent(s): eb864f6

Adding Keyword extract options, additional elements show checkboxes

Browse files
Files changed (1) hide show
  1. app.py +62 -25
app.py CHANGED
@@ -17,8 +17,16 @@ from nltk.corpus import wordnet
17
  import random
18
  from sense2vec import Sense2Vec
19
  import sense2vec
 
 
 
 
 
 
 
 
20
  # Load spaCy model
21
- nlp = spacy.load("en_core_web_sm")
22
  # s2v = Sense2Vec.from_disk(self=Sense2Vec,path='s2v_old')
23
 
24
  s2v = sense2vec.Sense2Vec().from_disk('s2v_old')
@@ -34,30 +42,35 @@ def load_model():
34
  return model, tokenizer
35
 
36
  # Function to extract keywords using combined techniques
37
- def extract_keywords(text):
 
 
 
 
 
 
 
 
 
38
  # Use RAKE
39
  rake = Rake()
40
  rake.extract_keywords_from_text(text)
41
  rake_keywords = set(rake.get_ranked_phrases())
42
-
43
  # Use spaCy for NER and POS tagging
44
- doc = nlp(text)
45
- spacy_keywords = set([ent.text for ent in doc.ents])
46
  spacy_keywords.update([token.text for token in doc if token.pos_ in ["NOUN", "PROPN", "VERB", "ADJ"]])
47
-
48
  # Use TF-IDF
49
  vectorizer = TfidfVectorizer(stop_words='english')
50
  X = vectorizer.fit_transform([text])
51
  tfidf_keywords = set(vectorizer.get_feature_names_out())
52
-
 
53
  # Combine all keywords
54
  combined_keywords = rake_keywords.union(spacy_keywords).union(tfidf_keywords)
55
 
56
  return list(combined_keywords)
57
 
58
- # Load spaCy model (medium-sized model with word vectors)
59
- nlp = spacy.load("en_core_web_md")
60
-
61
  def get_similar_words_sense2vec(word, n=3):
62
  # Try to find the word with its most likely part-of-speech
63
  word_with_pos = word + "|NOUN"
@@ -140,7 +153,6 @@ def entity_linking(keyword):
140
  return None
141
 
142
  # Function to generate questions using beam search
143
- @st.cache_data
144
  def generate_question(context, answer, num_beams):
145
  input_text = f"<context> {context} <answer> {answer}"
146
  input_ids = tokenizer.encode(input_text, return_tensors='pt')
@@ -169,6 +181,19 @@ def export_to_pdf(data):
169
  # pdf.output("questions.pdf")
170
  return pdf.output(name='questions.pdf',dest='S').encode('latin1')
171
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  if 'data' not in st.session_state:
173
  st.session_state.data = None
174
 
@@ -182,14 +207,21 @@ with st.sidebar:
182
  num_beams = st.slider("Select number of beams for question generation", min_value=1, max_value=10, value=5)
183
  context_window_size = st.slider("Select context window size (number of sentences before and after)", min_value=1, max_value=5, value=1)
184
  num_questions = st.slider("Select number of questions to generate", min_value=1, max_value=1000, value=5)
185
- question_complexity = st.selectbox("Select question complexity", ["Simple", "Intermediate", "Complex"])
 
 
 
 
 
 
186
  if st.button("Generate Questions"):
187
  if text:
188
  model, tokenizer = load_model()
189
- keywords = extract_keywords(text)
 
190
  keyword_sentence_mapping = map_keywords_to_sentences(text, keywords, context_window_size)
191
 
192
- st.subheader("Generated Questions:")
193
  data = []
194
  for i, (keyword, context) in enumerate(keyword_sentence_mapping.items()):
195
  if i >= num_questions:
@@ -197,22 +229,26 @@ if st.button("Generate Questions"):
197
  linked_entity = entity_linking(keyword)
198
  question = generate_question(context, keyword, num_beams=num_beams)
199
  options = generate_options(keyword, context)
 
200
 
201
- st.write(f"**Context:** {context}")
202
- st.write(f"**Answer:** {keyword}")
203
- st.write(f"**Question:** {question}")
204
- st.write(f"**Options:**")
205
- for j, option in enumerate(options):
206
- st.write(f"{chr(65+j)}. {option}")
207
-
208
- if linked_entity:
209
- st.write(f"**Entity Link:** {linked_entity}")
 
 
210
  st.write("---")
211
  data.append((context, keyword, question, options))
212
 
213
  # Add the data to session state
214
  st.session_state.data = data
215
-
 
216
  # Export buttons
217
  if st.session_state.data is not None:
218
  with st.sidebar:
@@ -227,4 +263,5 @@ if st.button("Generate Questions"):
227
 
228
 
229
  else:
230
- st.write("Please enter some text to generate questions.")
 
 
17
  import random
18
  from sense2vec import Sense2Vec
19
  import sense2vec
20
+ from wordcloud import WordCloud
21
+ import matplotlib.pyplot as plt
22
+ print("***************************************************************")
23
+
24
+ st.set_page_config(
25
+ page_title="Question Generator",
26
+ initial_sidebar_state="collapsed",
27
+ )
28
  # Load spaCy model
29
+ nlp = spacy.load("en_core_web_md")
30
  # s2v = Sense2Vec.from_disk(self=Sense2Vec,path='s2v_old')
31
 
32
  s2v = sense2vec.Sense2Vec().from_disk('s2v_old')
 
42
  return model, tokenizer
43
 
44
  # Function to extract keywords using combined techniques
45
+ def extract_keywords(text, extract_all):
46
+ doc = nlp(text)
47
+ spacy_keywords = set([ent.text for ent in doc.ents])
48
+ spacy_entities = spacy_keywords
49
+ print(f"\n\nSpacy Entities: {spacy_entities} \n\n")
50
+
51
+ # Use Only Spacy Entities
52
+ if extract_all is False:
53
+ return list(spacy_entities)
54
+
55
  # Use RAKE
56
  rake = Rake()
57
  rake.extract_keywords_from_text(text)
58
  rake_keywords = set(rake.get_ranked_phrases())
59
+ print(f"\n\nRake Keywords: {rake_keywords} \n\n")
60
  # Use spaCy for NER and POS tagging
 
 
61
  spacy_keywords.update([token.text for token in doc if token.pos_ in ["NOUN", "PROPN", "VERB", "ADJ"]])
62
+ print(f"\n\nSpacy Keywords: {spacy_keywords} \n\n")
63
  # Use TF-IDF
64
  vectorizer = TfidfVectorizer(stop_words='english')
65
  X = vectorizer.fit_transform([text])
66
  tfidf_keywords = set(vectorizer.get_feature_names_out())
67
+ print(f"\n\nTFIDF Entities: {tfidf_keywords} \n\n")
68
+
69
  # Combine all keywords
70
  combined_keywords = rake_keywords.union(spacy_keywords).union(tfidf_keywords)
71
 
72
  return list(combined_keywords)
73
 
 
 
 
74
  def get_similar_words_sense2vec(word, n=3):
75
  # Try to find the word with its most likely part-of-speech
76
  word_with_pos = word + "|NOUN"
 
153
  return None
154
 
155
  # Function to generate questions using beam search
 
156
  def generate_question(context, answer, num_beams):
157
  input_text = f"<context> {context} <answer> {answer}"
158
  input_ids = tokenizer.encode(input_text, return_tensors='pt')
 
181
  # pdf.output("questions.pdf")
182
  return pdf.output(name='questions.pdf',dest='S').encode('latin1')
183
 
184
+ def display_word_cloud(generated_questions):
185
+ word_frequency = {}
186
+ for question in generated_questions:
187
+ words = question.split()
188
+ for word in words:
189
+ word_frequency[word] = word_frequency.get(word, 0) + 1
190
+
191
+ wordcloud = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(word_frequency)
192
+ plt.figure(figsize=(10, 5))
193
+ plt.imshow(wordcloud, interpolation='bilinear')
194
+ plt.axis('off')
195
+ st.pyplot()
196
+
197
  if 'data' not in st.session_state:
198
  st.session_state.data = None
199
 
 
207
  num_beams = st.slider("Select number of beams for question generation", min_value=1, max_value=10, value=5)
208
  context_window_size = st.slider("Select context window size (number of sentences before and after)", min_value=1, max_value=5, value=1)
209
  num_questions = st.slider("Select number of questions to generate", min_value=1, max_value=1000, value=5)
210
+ with st.expander("Choose the Additional Elements to show"):
211
+ show_context = st.checkbox("Context",True)
212
+ show_answer = st.checkbox("Answer",True)
213
+ show_options = st.checkbox("Options",False)
214
+ show_entity_link = st.checkbox("Enitity Link For Wikipedia",True)
215
+ extract_all_keywords = st.toggle("Extract max Keywords",value=False)
216
+
217
  if st.button("Generate Questions"):
218
  if text:
219
  model, tokenizer = load_model()
220
+ keywords = extract_keywords(text,extract_all_keywords)
221
+ print(f"\n\nFinal Keywords in Main Function: {keywords}\n\n")
222
  keyword_sentence_mapping = map_keywords_to_sentences(text, keywords, context_window_size)
223
 
224
+ st.subheader("Generated Questions:",divider='blue')
225
  data = []
226
  for i, (keyword, context) in enumerate(keyword_sentence_mapping.items()):
227
  if i >= num_questions:
 
229
  linked_entity = entity_linking(keyword)
230
  question = generate_question(context, keyword, num_beams=num_beams)
231
  options = generate_options(keyword, context)
232
+ st.subheader(body=f":orange[Q{i+1}:] {question}")
233
 
234
+ if show_context is True:
235
+ st.write(f"**Context:** {context}")
236
+ if show_answer is True:
237
+ st.write(f"**Answer:** {keyword}")
238
+ if show_options is True:
239
+ st.write(f"**Options:**")
240
+ for j, option in enumerate(options):
241
+ st.write(f"{chr(65+j)}. {option}")
242
+ if show_entity_link is True:
243
+ if linked_entity:
244
+ st.write(f"**Entity Link:** {linked_entity}")
245
  st.write("---")
246
  data.append((context, keyword, question, options))
247
 
248
  # Add the data to session state
249
  st.session_state.data = data
250
+ # display_word_cloud()
251
+ print(data)
252
  # Export buttons
253
  if st.session_state.data is not None:
254
  with st.sidebar:
 
263
 
264
 
265
  else:
266
+ st.write("Please enter some text to generate questions.")
267
+ print("********************************************************************************")