Spaces:

DevBM
/

QGen

Running

App Files Files Community

DevBM

AneriThakkar commited on Jul 11

Commit

60121a2

•

1 Parent(s): 1b74db6

Using gliner for keyword extraction (#3)

Browse files

- Using gliner for keyword extraction (66e214b665ada0bf8ce14b58bb0d69527ea87e21)

Co-authored-by: Thakkar Aneri Pareshkumar <AneriThakkar@users.noreply.huggingface.co>

Files changed (1) hide show

app.py +25 -9

app.py CHANGED Viewed

@@ -41,6 +41,9 @@ from email.mime.base import MIMEBase
 from email.mime.application import MIMEApplication
 from email import encoders
 # '------------------'
 print("***************************************************************")
 st.set_page_config(
@@ -323,14 +326,24 @@ def segment_text(text, max_segment_length=700, batch_size=7):
 # Function to extract keywords using combined techniques
 def extract_keywords(text, extract_all):
     try:
         doc = nlp(text)
         spacy_keywords = set([ent.text for ent in doc.ents])
         spacy_entities = spacy_keywords
         print(f"\n\nSpacy Entities: {spacy_entities} \n\n")
-        # Use Only Spacy Entities
-        if extract_all is False:
-            return list(spacy_entities)
         # Use RAKE
         rake = Rake()
@@ -347,7 +360,7 @@ def extract_keywords(text, extract_all):
         print(f"\n\nTFIDF Entities: {tfidf_keywords} \n\n")
         # Combine all keywords
-        combined_keywords = rake_keywords.union(spacy_keywords).union(tfidf_keywords)
         return list(combined_keywords)
     except Exception as e:
@@ -427,9 +440,12 @@ def map_keywords_to_sentences(text, keywords, context_window_size):
         for i, sentence in enumerate(sentences):
             if keyword in sentence:
                 # Combine current sentence with surrounding sentences for context
-                start = max(0, i - context_window_size)
-                end = min(len(sentences), i + context_window_size + 1)
-                context = ' '.join(sentences[start:end])
                 if keyword not in keyword_sentence_mapping:
                     keyword_sentence_mapping[keyword] = context
                 else:
@@ -647,8 +663,8 @@ def main():
         text = clean_text(text)
     with st.expander("Show text"):
         st.write(text)
-    generate_questions_button = st.button("Generate Questions")
-    st.markdown('<span aria-label="Generate questions button">Above is the generate questions button</span>', unsafe_allow_html=True)
     # if generate_questions_button:
     if generate_questions_button and text:

 from email.mime.application import MIMEApplication
 from email import encoders
 # '------------------'
+from gliner import GLiNER
+# -------------------
 print("***************************************************************")
 st.set_page_config(
 # Function to extract keywords using combined techniques
 def extract_keywords(text, extract_all):
     try:
+        gliner_model = GLiNER.from_pretrained("knowledgator/gliner-multitask-large-v0.5")
+        labels = ["person", "organization", "email", "Award", "Date", "Competitions", "Teams", "location", "percentage", "money"]
+        entities = gliner_model.predict_entities(text, labels, threshold=0.7)
+        gliner_keywords = list(set([ent["text"] for ent in entities]))
+        print(f"Gliner keywords:{gliner_keywords}")
+        # Use Only Gliner Entities
+        if extract_all is False:
+            return list(gliner_keywords)
         doc = nlp(text)
         spacy_keywords = set([ent.text for ent in doc.ents])
         spacy_entities = spacy_keywords
         print(f"\n\nSpacy Entities: {spacy_entities} \n\n")
+        #
+        # if extract_all is False:
+        #     return list(spacy_entities)
         # Use RAKE
         rake = Rake()
         print(f"\n\nTFIDF Entities: {tfidf_keywords} \n\n")
         # Combine all keywords
+        combined_keywords = rake_keywords.union(spacy_keywords).union(tfidf_keywords).union(gliner_keywords)
         return list(combined_keywords)
     except Exception as e:
         for i, sentence in enumerate(sentences):
             if keyword in sentence:
                 # Combine current sentence with surrounding sentences for context
+                # start = max(0, i - context_window_size)
+                # end = min(len(sentences), i + context_window_size + 1)
+                start = max(0,i - context_window_size)
+                context_sentenses = sentences[start:i+1]
+                context = ' '.join(context_sentenses)
+                # context = ' '.join(sentences[start:end])
                 if keyword not in keyword_sentence_mapping:
                     keyword_sentence_mapping[keyword] = context
                 else:
         text = clean_text(text)
     with st.expander("Show text"):
         st.write(text)
+    generate_questions_button = st.button("Generate Questions",help="This is the generate questions button")
+    # st.markdown('<span aria-label="Generate questions button">Above is the generate questions button</span>', unsafe_allow_html=True)
     # if generate_questions_button:
     if generate_questions_button and text: