Adding Keyword extract options, additional elements show checkboxes
Browse files
@@ -17,8 +17,16 @@ from nltk.corpus import wordnet
17 |
import random
18 |
from sense2vec import Sense2Vec
19 |
import sense2vec
20 |
# Load spaCy model
21 |
nlp = spacy.load("
22 |
# s2v = Sense2Vec.from_disk(self=Sense2Vec,path='s2v_old')
23 |
24 |
s2v = sense2vec.Sense2Vec().from_disk('s2v_old')
@@ -34,30 +42,35 @@ def load_model():
34 |
return model, tokenizer
35 |
36 |
# Function to extract keywords using combined techniques
37 |
def extract_keywords(text):
38 |
# Use RAKE
39 |
rake = Rake()
40 |
41 |
rake_keywords = set(rake.get_ranked_phrases())
42 |
43 |
# Use spaCy for NER and POS tagging
44 |
doc = nlp(text)
45 |
spacy_keywords = set([ent.text for ent in doc.ents])
46 |
spacy_keywords.update([token.text for token in doc if token.pos_ in ["NOUN", "PROPN", "VERB", "ADJ"]])
47 |
48 |
# Use TF-IDF
49 |
vectorizer = TfidfVectorizer(stop_words='english')
50 |
X = vectorizer.fit_transform([text])
51 |
tfidf_keywords = set(vectorizer.get_feature_names_out())
52 |
53 |
# Combine all keywords
54 |
combined_keywords = rake_keywords.union(spacy_keywords).union(tfidf_keywords)
55 |
56 |
return list(combined_keywords)
57 |
58 |
# Load spaCy model (medium-sized model with word vectors)
59 |
nlp = spacy.load("en_core_web_md")
60 |
61 |
def get_similar_words_sense2vec(word, n=3):
62 |
# Try to find the word with its most likely part-of-speech
63 |
word_with_pos = word + "|NOUN"
@@ -140,7 +153,6 @@ def entity_linking(keyword):
140 |
return None
141 |
142 |
# Function to generate questions using beam search
143 |
144 |
def generate_question(context, answer, num_beams):
145 |
input_text = f"<context> {context} <answer> {answer}"
146 |
input_ids = tokenizer.encode(input_text, return_tensors='pt')
@@ -169,6 +181,19 @@ def export_to_pdf(data):
169 |
# pdf.output("questions.pdf")
170 |
return pdf.output(name='questions.pdf',dest='S').encode('latin1')
171 |
172 |
if 'data' not in st.session_state:
173 | = None
174 |
@@ -182,14 +207,21 @@ with st.sidebar:
182 |
num_beams = st.slider("Select number of beams for question generation", min_value=1, max_value=10, value=5)
183 |
context_window_size = st.slider("Select context window size (number of sentences before and after)", min_value=1, max_value=5, value=1)
184 |
num_questions = st.slider("Select number of questions to generate", min_value=1, max_value=1000, value=5)
185 |
186 |
if st.button("Generate Questions"):
187 |
if text:
188 |
model, tokenizer = load_model()
189 |
keywords = extract_keywords(text)
190 |
keyword_sentence_mapping = map_keywords_to_sentences(text, keywords, context_window_size)
191 |
192 |
st.subheader("Generated Questions:")
193 |
data = []
194 |
for i, (keyword, context) in enumerate(keyword_sentence_mapping.items()):
195 |
if i >= num_questions:
@@ -197,22 +229,26 @@ if st.button("Generate Questions"):
197 |
linked_entity = entity_linking(keyword)
198 |
question = generate_question(context, keyword, num_beams=num_beams)
199 |
options = generate_options(keyword, context)
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
data.append((context, keyword, question, options))
212 |
213 |
# Add the data to session state
214 | = data
215 |
216 |
# Export buttons
217 |
if is not None:
218 |
with st.sidebar:
@@ -227,4 +263,5 @@ if st.button("Generate Questions"):
227 |
228 |
229 |
230 |
st.write("Please enter some text to generate questions.")
17 |
import random
18 |
from sense2vec import Sense2Vec
19 |
import sense2vec
20 |
from wordcloud import WordCloud
21 |
import matplotlib.pyplot as plt
22 |
23 |
24 |
25 |
page_title="Question Generator",
26 |
27 |
28 |
# Load spaCy model
29 |
nlp = spacy.load("en_core_web_md")
30 |
# s2v = Sense2Vec.from_disk(self=Sense2Vec,path='s2v_old')
31 |
32 |
s2v = sense2vec.Sense2Vec().from_disk('s2v_old')
42 |
return model, tokenizer
43 |
44 |
# Function to extract keywords using combined techniques
45 |
def extract_keywords(text, extract_all):
46 |
doc = nlp(text)
47 |
spacy_keywords = set([ent.text for ent in doc.ents])
48 |
spacy_entities = spacy_keywords
49 |
print(f"\n\nSpacy Entities: {spacy_entities} \n\n")
50 |
51 |
# Use Only Spacy Entities
52 |
if extract_all is False:
53 |
return list(spacy_entities)
54 |
55 |
# Use RAKE
56 |
rake = Rake()
57 |
58 |
rake_keywords = set(rake.get_ranked_phrases())
59 |
print(f"\n\nRake Keywords: {rake_keywords} \n\n")
60 |
# Use spaCy for NER and POS tagging
61 |
spacy_keywords.update([token.text for token in doc if token.pos_ in ["NOUN", "PROPN", "VERB", "ADJ"]])
62 |
print(f"\n\nSpacy Keywords: {spacy_keywords} \n\n")
63 |
# Use TF-IDF
64 |
vectorizer = TfidfVectorizer(stop_words='english')
65 |
X = vectorizer.fit_transform([text])
66 |
tfidf_keywords = set(vectorizer.get_feature_names_out())
67 |
print(f"\n\nTFIDF Entities: {tfidf_keywords} \n\n")
68 |
69 |
# Combine all keywords
70 |
combined_keywords = rake_keywords.union(spacy_keywords).union(tfidf_keywords)
71 |
72 |
return list(combined_keywords)
73 |
74 |
def get_similar_words_sense2vec(word, n=3):
75 |
# Try to find the word with its most likely part-of-speech
76 |
word_with_pos = word + "|NOUN"
153 |
return None
154 |
155 |
# Function to generate questions using beam search
156 |
def generate_question(context, answer, num_beams):
157 |
input_text = f"<context> {context} <answer> {answer}"
158 |
input_ids = tokenizer.encode(input_text, return_tensors='pt')
181 |
# pdf.output("questions.pdf")
182 |
return pdf.output(name='questions.pdf',dest='S').encode('latin1')
183 |
184 |
def display_word_cloud(generated_questions):
185 |
word_frequency = {}
186 |
for question in generated_questions:
187 |
words = question.split()
188 |
for word in words:
189 |
word_frequency[word] = word_frequency.get(word, 0) + 1
190 |
191 |
wordcloud = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(word_frequency)
192 |
plt.figure(figsize=(10, 5))
193 |
plt.imshow(wordcloud, interpolation='bilinear')
194 |
195 |
196 |
197 |
if 'data' not in st.session_state:
198 | = None
199 |
207 |
num_beams = st.slider("Select number of beams for question generation", min_value=1, max_value=10, value=5)
208 |
context_window_size = st.slider("Select context window size (number of sentences before and after)", min_value=1, max_value=5, value=1)
209 |
num_questions = st.slider("Select number of questions to generate", min_value=1, max_value=1000, value=5)
210 |
with st.expander("Choose the Additional Elements to show"):
211 |
show_context = st.checkbox("Context",True)
212 |
show_answer = st.checkbox("Answer",True)
213 |
show_options = st.checkbox("Options",False)
214 |
show_entity_link = st.checkbox("Enitity Link For Wikipedia",True)
215 |
extract_all_keywords = st.toggle("Extract max Keywords",value=False)
216 |
217 |
if st.button("Generate Questions"):
218 |
if text:
219 |
model, tokenizer = load_model()
220 |
keywords = extract_keywords(text,extract_all_keywords)
221 |
print(f"\n\nFinal Keywords in Main Function: {keywords}\n\n")
222 |
keyword_sentence_mapping = map_keywords_to_sentences(text, keywords, context_window_size)
223 |
224 |
st.subheader("Generated Questions:",divider='blue')
225 |
data = []
226 |
for i, (keyword, context) in enumerate(keyword_sentence_mapping.items()):
227 |
if i >= num_questions:
229 |
linked_entity = entity_linking(keyword)
230 |
question = generate_question(context, keyword, num_beams=num_beams)
231 |
options = generate_options(keyword, context)
232 |
st.subheader(body=f":orange[Q{i+1}:] {question}")
233 |
234 |
if show_context is True:
235 |
st.write(f"**Context:** {context}")
236 |
if show_answer is True:
237 |
st.write(f"**Answer:** {keyword}")
238 |
if show_options is True:
239 |
240 |
for j, option in enumerate(options):
241 |
st.write(f"{chr(65+j)}. {option}")
242 |
if show_entity_link is True:
243 |
if linked_entity:
244 |
st.write(f"**Entity Link:** {linked_entity}")
245 |
246 |
data.append((context, keyword, question, options))
247 |
248 |
# Add the data to session state
249 | = data
250 |
# display_word_cloud()
251 |
252 |
# Export buttons
253 |
if is not None:
254 |
with st.sidebar:
263 |
264 |
265 |
266 |
st.write("Please enter some text to generate questions.")
267 |