Spaces:

DevBM
/

QGen

Running

App Files Files Community

DevBM commited on Jul 2

Commit

068e84d

•

1 Parent(s): 0ba53c8

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -0

app.py CHANGED Viewed

@@ -12,6 +12,10 @@ nltk.download('punkt')
 nltk.download('stopwords')
 nltk.download('brown')
 from nltk.tokenize import sent_tokenize
 # Load spaCy model
 nlp = spacy.load("en_core_web_sm")
@@ -20,6 +24,9 @@ nlp = spacy.load("en_core_web_sm")
 user_agent = 'QGen/1.0 (channingfisher7@gmail.com)'
 wiki_wiki = wikipediaapi.Wikipedia(user_agent= user_agent,language='en')
 def load_model():
     model_name = "DevBM/t5-large-squad"
     model = T5ForConditionalGeneration.from_pretrained(model_name)
@@ -73,6 +80,54 @@ def map_keywords_to_sentences(text, keywords, context_window_size):
                     keyword_sentence_mapping[keyword] += ' ' + context
     return keyword_sentence_mapping
 # Function to perform entity linking using Wikipedia API
 @lru_cache(maxsize=128)
 def entity_linking(keyword):
@@ -137,9 +192,15 @@ if st.button("Generate Questions"):
                 break
             linked_entity = entity_linking(keyword)
             question = generate_question(context, keyword, num_beams=num_beams)
             st.write(f"**Context:** {context}")
             st.write(f"**Answer:** {keyword}")
             st.write(f"**Question:** {question}")
             if linked_entity:
                 st.write(f"**Entity Link:** {linked_entity}")
             st.write("---")
@@ -157,6 +218,9 @@ if st.button("Generate Questions"):
                 pdf_data = export_to_pdf(data)
                 st.download_button(label="PDF Format", data=pdf_data, file_name='questions.pdf', mime='application/pdf')
     else:
         st.write("Please enter some text to generate questions.")

 nltk.download('stopwords')
 nltk.download('brown')
 from nltk.tokenize import sent_tokenize
+nltk.downlaod('wordnet')
+from gensim.models import KeyedVectors
+from nltk.corpus import wordnet
+import random
 # Load spaCy model
 nlp = spacy.load("en_core_web_sm")
 user_agent = 'QGen/1.0 (channingfisher7@gmail.com)'
 wiki_wiki = wikipediaapi.Wikipedia(user_agent= user_agent,language='en')
+# Load pre-trained word vectors (this may take a while)
+word_vectors = KeyedVectors.load_word2vec_format('vectors/GoogleNews-vectors-negative300.bin', binary=True)
 def load_model():
     model_name = "DevBM/t5-large-squad"
     model = T5ForConditionalGeneration.from_pretrained(model_name)
                     keyword_sentence_mapping[keyword] += ' ' + context
     return keyword_sentence_mapping
+def get_similar_words(word, n=3):
+    try:
+        similar_words = word_vectors.most_similar(word, topn=n)
+        return [word for word, _ in similar_words]
+    except KeyError:
+        return []
+def get_synonyms(word, n=3):
+    synonyms = []
+    for syn in wordnet.synsets(word):
+        for lemma in syn.lemmas():
+            if lemma.name() != word and lemma.name() not in synonyms:
+                synonyms.append(lemma.name())
+                if len(synonyms) == n:
+                    return synonyms
+    return synonyms
+def generate_options(answer, context, n=3):
+    options = [answer]
+    # Try to get similar words based on word embeddings
+    similar_words = get_similar_words(answer, n)
+    options.extend(similar_words)
+    # If we don't have enough options, try synonyms
+    if len(options) < n + 1:
+        synonyms = get_synonyms(answer, n - len(options) + 1)
+        options.extend(synonyms)
+    # If we still don't have enough options, extract other entities from the context
+    if len(options) < n + 1:
+        doc = nlp(context)
+        entities = [ent.text for ent in doc.ents if ent.text.lower() != answer.lower()]
+        options.extend(entities[:n - len(options) + 1])
+    # If we still need more options, add some random words from the context
+    if len(options) < n + 1:
+        context_words = [token.text for token in nlp(context) if token.is_alpha and token.text.lower() != answer.lower()]
+        options.extend(random.sample(context_words, min(n - len(options) + 1, len(context_words))))
+    # Ensure we have the correct number of unique options
+    options = list(dict.fromkeys(options))[:n+1]
+    # Shuffle the options
+    random.shuffle(options)
+    return options
 # Function to perform entity linking using Wikipedia API
 @lru_cache(maxsize=128)
 def entity_linking(keyword):
                 break
             linked_entity = entity_linking(keyword)
             question = generate_question(context, keyword, num_beams=num_beams)
+            options = generate_options(keyword, context)
             st.write(f"**Context:** {context}")
             st.write(f"**Answer:** {keyword}")
             st.write(f"**Question:** {question}")
+            st.write(f"**Options:**")
+            for j, option in options:
+                st.write(f"{chr(65+j)}. {option}")
             if linked_entity:
                 st.write(f"**Entity Link:** {linked_entity}")
             st.write("---")
                 pdf_data = export_to_pdf(data)
                 st.download_button(label="PDF Format", data=pdf_data, file_name='questions.pdf', mime='application/pdf')
+        if st.session_state.data is not None:
+            st.markdown("You can download the data from the sidebar.")
     else:
         st.write("Please enter some text to generate questions.")