Spaces:

DevBM
/

QGen

Running

App Files Files Community

DevBM commited on Jul 4

Commit

9e75c6e

•

1 Parent(s): 65b7df9

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -9

app.py CHANGED Viewed

@@ -36,8 +36,8 @@ st.set_page_config(
         "About" : "#Hi this our project."
     }
 )
-# st.set_option(deprecation.showPyplotGlobalUse=False)
-# st.set_option('base','dark')
 # Initialize Wikipedia API with a user agent
 user_agent = 'QGen/1.0 (channingfisher7@gmail.com)'
 wiki_wiki = wikipediaapi.Wikipedia(user_agent= user_agent,language='en')
@@ -65,8 +65,8 @@ def set_state(session_id, key, value):
     st.session_state.session_states[session_id][key] = value
 @st.cache_resource
-def load_model():
-    model_name = "DevBM/t5-large-squad"
     model = T5ForConditionalGeneration.from_pretrained(model_name)
     tokenizer = T5Tokenizer.from_pretrained(model_name)
     return model, tokenizer
@@ -88,10 +88,48 @@ def load_qa_models():
     return similarity_model, spell
 nlp, s2v = load_nlp_models()
-model, tokenizer = load_model()
 similarity_model, spell = load_qa_models()
 context_model = similarity_model
 def get_pdf_text(pdf_file):
     doc = pymupdf.open(stream=pdf_file.read(), filetype="pdf")
     text = ""
@@ -124,7 +162,7 @@ def clean_text(text):
     return text
 # Function to create text chunks
-def segment_text(text, max_segment_length=1000):
     """Segment the text into smaller chunks."""
     sentences = sent_tokenize(text)
     segments = []
@@ -268,7 +306,7 @@ def entity_linking(keyword):
 def generate_question(context, answer, num_beams):
     input_text = f"<context> {context} <answer> {answer}"
     input_ids = tokenizer.encode(input_text, return_tensors='pt')
-    outputs = model.generate(input_ids, num_beams=num_beams, early_stopping=True)
     question = tokenizer.decode(outputs[0], skip_special_tokens=True)
     return question
@@ -337,8 +375,10 @@ def main():
     st.title(":blue[Question Generator System]")
     session_id = get_session_id()
     state = initialize_state(session_id)
     with st.sidebar:
         st.subheader("Customization Options")
         # Customization options
         input_type = st.radio("Select Input Preference", ("Text Input","Upload PDF"))
@@ -356,7 +396,10 @@ def main():
             extract_all_keywords = st.toggle("Extract Max Keywords",value=False)
         with col2:
             enable_feedback_mode = st.toggle("Enable Feedback Mode",False)
     # set_state(session_id, 'generated_questions', state['generated_questions'])
     text = None
     if input_type == "Text Input":
         text = st.text_area("Enter text here:", value="Joe Biden, the current US president is on a weak wicket going in for his reelection later this November against former President Donald Trump.")
@@ -445,12 +488,13 @@ def main():
         # Export buttons
         # if st.session_state.generated_questions:
         if state['generated_questions']:
-            with st.sidebar:
                 csv_data = export_to_csv(state['generated_questions'])
                 st.download_button(label="Download CSV", data=csv_data, file_name='questions.csv', mime='text/csv')
                 pdf_data = export_to_pdf(state['generated_questions'])
                 st.download_button(label="Download PDF", data=pdf_data, file_name='questions.pdf', mime='application/pdf')
             with st.expander("View Visualizations"):
                 questions = [tpl['question'] for tpl in state['generated_questions']]
                 overall_scores = [tpl['overall_score'] for tpl in state['generated_questions']]

         "About" : "#Hi this our project."
     }
 )
+st.set_option('deprecation.showPyplotGlobalUse',False)
 # Initialize Wikipedia API with a user agent
 user_agent = 'QGen/1.0 (channingfisher7@gmail.com)'
 wiki_wiki = wikipediaapi.Wikipedia(user_agent= user_agent,language='en')
     st.session_state.session_states[session_id][key] = value
 @st.cache_resource
+def load_model(modelname):
+    model_name = modelname
     model = T5ForConditionalGeneration.from_pretrained(model_name)
     tokenizer = T5Tokenizer.from_pretrained(model_name)
     return model, tokenizer
     return similarity_model, spell
 nlp, s2v = load_nlp_models()
+model, tokenizer = load_model('DevBM/t5-large-small')
 similarity_model, spell = load_qa_models()
 context_model = similarity_model
+# Info Section
+def display_info():
+    st.sidebar.title("Information")
+    st.sidebar.markdown("""
+        ### Question Generator System
+        This system is designed to generate questions based on the provided context. It uses various NLP techniques and models to:
+        - Extract keywords from the text
+        - Map keywords to sentences
+        - Generate questions
+        - Provide multiple choice options
+        - Assess the quality of generated questions
+        #### Key Features:
+        - **Keyword Extraction:** Combines RAKE, TF-IDF, and spaCy for comprehensive keyword extraction.
+        - **Question Generation:** Utilizes a pre-trained T5 model for generating questions.
+        - **Options Generation:** Creates contextually relevant multiple-choice options.
+        - **Question Assessment:** Scores questions based on relevance, complexity, and spelling correctness.
+        - **Feedback Collection:** Allows users to rate the generated questions and provides statistics on feedback.
+        #### Customization Options:
+        - Number of beams for question generation
+        - Context window size for mapping keywords to sentences
+        - Number of questions to generate
+        - Additional display elements (context, answer, options, entity link, QA scores)
+        #### Outputs:
+        - Generated questions with multiple-choice options
+        - Download options for CSV and PDF formats
+        - Visualization of overall scores
+    """)
+# Text Preprocessing Function
+def preprocess_text(text):
+    # Remove newlines and extra spaces
+    text = re.sub(r'\s+', ' ', text)
+    return text
 def get_pdf_text(pdf_file):
     doc = pymupdf.open(stream=pdf_file.read(), filetype="pdf")
     text = ""
     return text
 # Function to create text chunks
+def segment_text(text, max_segment_length=500):
     """Segment the text into smaller chunks."""
     sentences = sent_tokenize(text)
     segments = []
 def generate_question(context, answer, num_beams):
     input_text = f"<context> {context} <answer> {answer}"
     input_ids = tokenizer.encode(input_text, return_tensors='pt')
+    outputs = model.generate(input_ids, num_beams=num_beams, early_stopping=True, max_length=150)
     question = tokenizer.decode(outputs[0], skip_special_tokens=True)
     return question
     st.title(":blue[Question Generator System]")
     session_id = get_session_id()
     state = initialize_state(session_id)
     with st.sidebar:
+        show_info = st.toggle('Show Info',True)
+        if show_info:
+            display_info()
         st.subheader("Customization Options")
         # Customization options
         input_type = st.radio("Select Input Preference", ("Text Input","Upload PDF"))
             extract_all_keywords = st.toggle("Extract Max Keywords",value=False)
         with col2:
             enable_feedback_mode = st.toggle("Enable Feedback Mode",False)
+        use_t5_small = st.toggle("Use T5-Small",False)
     # set_state(session_id, 'generated_questions', state['generated_questions'])
+    if use_t5_small is True:
+        model, tokenizer = load_model('AneriThakkar/flan-t5-small-finetuned')
     text = None
     if input_type == "Text Input":
         text = st.text_area("Enter text here:", value="Joe Biden, the current US president is on a weak wicket going in for his reelection later this November against former President Donald Trump.")
         # Export buttons
         # if st.session_state.generated_questions:
         if state['generated_questions']:
+            with st.sidebar:
                 csv_data = export_to_csv(state['generated_questions'])
                 st.download_button(label="Download CSV", data=csv_data, file_name='questions.csv', mime='text/csv')
                 pdf_data = export_to_pdf(state['generated_questions'])
                 st.download_button(label="Download PDF", data=pdf_data, file_name='questions.pdf', mime='application/pdf')
             with st.expander("View Visualizations"):
                 questions = [tpl['question'] for tpl in state['generated_questions']]
                 overall_scores = [tpl['overall_score'] for tpl in state['generated_questions']]