DevBM commited on
Commit
068e84d
1 Parent(s): 0ba53c8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -0
app.py CHANGED
@@ -12,6 +12,10 @@ nltk.download('punkt')
12
  nltk.download('stopwords')
13
  nltk.download('brown')
14
  from nltk.tokenize import sent_tokenize
 
 
 
 
15
 
16
  # Load spaCy model
17
  nlp = spacy.load("en_core_web_sm")
@@ -20,6 +24,9 @@ nlp = spacy.load("en_core_web_sm")
20
  user_agent = 'QGen/1.0 (channingfisher7@gmail.com)'
21
  wiki_wiki = wikipediaapi.Wikipedia(user_agent= user_agent,language='en')
22
 
 
 
 
23
  def load_model():
24
  model_name = "DevBM/t5-large-squad"
25
  model = T5ForConditionalGeneration.from_pretrained(model_name)
@@ -73,6 +80,54 @@ def map_keywords_to_sentences(text, keywords, context_window_size):
73
  keyword_sentence_mapping[keyword] += ' ' + context
74
  return keyword_sentence_mapping
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  # Function to perform entity linking using Wikipedia API
77
  @lru_cache(maxsize=128)
78
  def entity_linking(keyword):
@@ -137,9 +192,15 @@ if st.button("Generate Questions"):
137
  break
138
  linked_entity = entity_linking(keyword)
139
  question = generate_question(context, keyword, num_beams=num_beams)
 
 
140
  st.write(f"**Context:** {context}")
141
  st.write(f"**Answer:** {keyword}")
142
  st.write(f"**Question:** {question}")
 
 
 
 
143
  if linked_entity:
144
  st.write(f"**Entity Link:** {linked_entity}")
145
  st.write("---")
@@ -157,6 +218,9 @@ if st.button("Generate Questions"):
157
 
158
  pdf_data = export_to_pdf(data)
159
  st.download_button(label="PDF Format", data=pdf_data, file_name='questions.pdf', mime='application/pdf')
 
 
160
 
 
161
  else:
162
  st.write("Please enter some text to generate questions.")
 
12
  nltk.download('stopwords')
13
  nltk.download('brown')
14
  from nltk.tokenize import sent_tokenize
15
+ nltk.downlaod('wordnet')
16
+ from gensim.models import KeyedVectors
17
+ from nltk.corpus import wordnet
18
+ import random
19
 
20
  # Load spaCy model
21
  nlp = spacy.load("en_core_web_sm")
 
24
  user_agent = 'QGen/1.0 (channingfisher7@gmail.com)'
25
  wiki_wiki = wikipediaapi.Wikipedia(user_agent= user_agent,language='en')
26
 
27
+ # Load pre-trained word vectors (this may take a while)
28
+ word_vectors = KeyedVectors.load_word2vec_format('vectors/GoogleNews-vectors-negative300.bin', binary=True)
29
+
30
  def load_model():
31
  model_name = "DevBM/t5-large-squad"
32
  model = T5ForConditionalGeneration.from_pretrained(model_name)
 
80
  keyword_sentence_mapping[keyword] += ' ' + context
81
  return keyword_sentence_mapping
82
 
83
+ def get_similar_words(word, n=3):
84
+ try:
85
+ similar_words = word_vectors.most_similar(word, topn=n)
86
+ return [word for word, _ in similar_words]
87
+ except KeyError:
88
+ return []
89
+
90
+ def get_synonyms(word, n=3):
91
+ synonyms = []
92
+ for syn in wordnet.synsets(word):
93
+ for lemma in syn.lemmas():
94
+ if lemma.name() != word and lemma.name() not in synonyms:
95
+ synonyms.append(lemma.name())
96
+ if len(synonyms) == n:
97
+ return synonyms
98
+ return synonyms
99
+
100
+ def generate_options(answer, context, n=3):
101
+ options = [answer]
102
+
103
+ # Try to get similar words based on word embeddings
104
+ similar_words = get_similar_words(answer, n)
105
+ options.extend(similar_words)
106
+
107
+ # If we don't have enough options, try synonyms
108
+ if len(options) < n + 1:
109
+ synonyms = get_synonyms(answer, n - len(options) + 1)
110
+ options.extend(synonyms)
111
+
112
+ # If we still don't have enough options, extract other entities from the context
113
+ if len(options) < n + 1:
114
+ doc = nlp(context)
115
+ entities = [ent.text for ent in doc.ents if ent.text.lower() != answer.lower()]
116
+ options.extend(entities[:n - len(options) + 1])
117
+
118
+ # If we still need more options, add some random words from the context
119
+ if len(options) < n + 1:
120
+ context_words = [token.text for token in nlp(context) if token.is_alpha and token.text.lower() != answer.lower()]
121
+ options.extend(random.sample(context_words, min(n - len(options) + 1, len(context_words))))
122
+
123
+ # Ensure we have the correct number of unique options
124
+ options = list(dict.fromkeys(options))[:n+1]
125
+
126
+ # Shuffle the options
127
+ random.shuffle(options)
128
+
129
+ return options
130
+
131
  # Function to perform entity linking using Wikipedia API
132
  @lru_cache(maxsize=128)
133
  def entity_linking(keyword):
 
192
  break
193
  linked_entity = entity_linking(keyword)
194
  question = generate_question(context, keyword, num_beams=num_beams)
195
+ options = generate_options(keyword, context)
196
+
197
  st.write(f"**Context:** {context}")
198
  st.write(f"**Answer:** {keyword}")
199
  st.write(f"**Question:** {question}")
200
+ st.write(f"**Options:**")
201
+ for j, option in options:
202
+ st.write(f"{chr(65+j)}. {option}")
203
+
204
  if linked_entity:
205
  st.write(f"**Entity Link:** {linked_entity}")
206
  st.write("---")
 
218
 
219
  pdf_data = export_to_pdf(data)
220
  st.download_button(label="PDF Format", data=pdf_data, file_name='questions.pdf', mime='application/pdf')
221
+ if st.session_state.data is not None:
222
+ st.markdown("You can download the data from the sidebar.")
223
 
224
+
225
  else:
226
  st.write("Please enter some text to generate questions.")