Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -36,8 +36,8 @@ st.set_page_config(
|
|
36 |
"About" : "#Hi this our project."
|
37 |
}
|
38 |
)
|
39 |
-
|
40 |
-
|
41 |
# Initialize Wikipedia API with a user agent
|
42 |
user_agent = 'QGen/1.0 (channingfisher7@gmail.com)'
|
43 |
wiki_wiki = wikipediaapi.Wikipedia(user_agent= user_agent,language='en')
|
@@ -65,8 +65,8 @@ def set_state(session_id, key, value):
|
|
65 |
st.session_state.session_states[session_id][key] = value
|
66 |
|
67 |
@st.cache_resource
|
68 |
-
def load_model():
|
69 |
-
model_name =
|
70 |
model = T5ForConditionalGeneration.from_pretrained(model_name)
|
71 |
tokenizer = T5Tokenizer.from_pretrained(model_name)
|
72 |
return model, tokenizer
|
@@ -88,10 +88,48 @@ def load_qa_models():
|
|
88 |
return similarity_model, spell
|
89 |
|
90 |
nlp, s2v = load_nlp_models()
|
91 |
-
model, tokenizer = load_model()
|
92 |
similarity_model, spell = load_qa_models()
|
93 |
context_model = similarity_model
|
94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
def get_pdf_text(pdf_file):
|
96 |
doc = pymupdf.open(stream=pdf_file.read(), filetype="pdf")
|
97 |
text = ""
|
@@ -124,7 +162,7 @@ def clean_text(text):
|
|
124 |
return text
|
125 |
|
126 |
# Function to create text chunks
|
127 |
-
def segment_text(text, max_segment_length=
|
128 |
"""Segment the text into smaller chunks."""
|
129 |
sentences = sent_tokenize(text)
|
130 |
segments = []
|
@@ -268,7 +306,7 @@ def entity_linking(keyword):
|
|
268 |
def generate_question(context, answer, num_beams):
|
269 |
input_text = f"<context> {context} <answer> {answer}"
|
270 |
input_ids = tokenizer.encode(input_text, return_tensors='pt')
|
271 |
-
outputs = model.generate(input_ids, num_beams=num_beams, early_stopping=True)
|
272 |
question = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
273 |
return question
|
274 |
|
@@ -337,8 +375,10 @@ def main():
|
|
337 |
st.title(":blue[Question Generator System]")
|
338 |
session_id = get_session_id()
|
339 |
state = initialize_state(session_id)
|
340 |
-
|
341 |
with st.sidebar:
|
|
|
|
|
|
|
342 |
st.subheader("Customization Options")
|
343 |
# Customization options
|
344 |
input_type = st.radio("Select Input Preference", ("Text Input","Upload PDF"))
|
@@ -356,7 +396,10 @@ def main():
|
|
356 |
extract_all_keywords = st.toggle("Extract Max Keywords",value=False)
|
357 |
with col2:
|
358 |
enable_feedback_mode = st.toggle("Enable Feedback Mode",False)
|
|
|
359 |
# set_state(session_id, 'generated_questions', state['generated_questions'])
|
|
|
|
|
360 |
text = None
|
361 |
if input_type == "Text Input":
|
362 |
text = st.text_area("Enter text here:", value="Joe Biden, the current US president is on a weak wicket going in for his reelection later this November against former President Donald Trump.")
|
@@ -445,12 +488,13 @@ def main():
|
|
445 |
# Export buttons
|
446 |
# if st.session_state.generated_questions:
|
447 |
if state['generated_questions']:
|
448 |
-
with st.sidebar:
|
449 |
csv_data = export_to_csv(state['generated_questions'])
|
450 |
st.download_button(label="Download CSV", data=csv_data, file_name='questions.csv', mime='text/csv')
|
451 |
|
452 |
pdf_data = export_to_pdf(state['generated_questions'])
|
453 |
st.download_button(label="Download PDF", data=pdf_data, file_name='questions.pdf', mime='application/pdf')
|
|
|
454 |
with st.expander("View Visualizations"):
|
455 |
questions = [tpl['question'] for tpl in state['generated_questions']]
|
456 |
overall_scores = [tpl['overall_score'] for tpl in state['generated_questions']]
|
|
|
36 |
"About" : "#Hi this our project."
|
37 |
}
|
38 |
)
|
39 |
+
|
40 |
+
st.set_option('deprecation.showPyplotGlobalUse',False)
|
41 |
# Initialize Wikipedia API with a user agent
|
42 |
user_agent = 'QGen/1.0 (channingfisher7@gmail.com)'
|
43 |
wiki_wiki = wikipediaapi.Wikipedia(user_agent= user_agent,language='en')
|
|
|
65 |
st.session_state.session_states[session_id][key] = value
|
66 |
|
67 |
@st.cache_resource
|
68 |
+
def load_model(modelname):
|
69 |
+
model_name = modelname
|
70 |
model = T5ForConditionalGeneration.from_pretrained(model_name)
|
71 |
tokenizer = T5Tokenizer.from_pretrained(model_name)
|
72 |
return model, tokenizer
|
|
|
88 |
return similarity_model, spell
|
89 |
|
90 |
nlp, s2v = load_nlp_models()
|
91 |
+
model, tokenizer = load_model('DevBM/t5-large-small')
|
92 |
similarity_model, spell = load_qa_models()
|
93 |
context_model = similarity_model
|
94 |
|
95 |
+
# Info Section
|
96 |
+
def display_info():
|
97 |
+
st.sidebar.title("Information")
|
98 |
+
st.sidebar.markdown("""
|
99 |
+
### Question Generator System
|
100 |
+
This system is designed to generate questions based on the provided context. It uses various NLP techniques and models to:
|
101 |
+
- Extract keywords from the text
|
102 |
+
- Map keywords to sentences
|
103 |
+
- Generate questions
|
104 |
+
- Provide multiple choice options
|
105 |
+
- Assess the quality of generated questions
|
106 |
+
|
107 |
+
#### Key Features:
|
108 |
+
- **Keyword Extraction:** Combines RAKE, TF-IDF, and spaCy for comprehensive keyword extraction.
|
109 |
+
- **Question Generation:** Utilizes a pre-trained T5 model for generating questions.
|
110 |
+
- **Options Generation:** Creates contextually relevant multiple-choice options.
|
111 |
+
- **Question Assessment:** Scores questions based on relevance, complexity, and spelling correctness.
|
112 |
+
- **Feedback Collection:** Allows users to rate the generated questions and provides statistics on feedback.
|
113 |
+
|
114 |
+
#### Customization Options:
|
115 |
+
- Number of beams for question generation
|
116 |
+
- Context window size for mapping keywords to sentences
|
117 |
+
- Number of questions to generate
|
118 |
+
- Additional display elements (context, answer, options, entity link, QA scores)
|
119 |
+
|
120 |
+
#### Outputs:
|
121 |
+
- Generated questions with multiple-choice options
|
122 |
+
- Download options for CSV and PDF formats
|
123 |
+
- Visualization of overall scores
|
124 |
+
|
125 |
+
""")
|
126 |
+
|
127 |
+
# Text Preprocessing Function
|
128 |
+
def preprocess_text(text):
|
129 |
+
# Remove newlines and extra spaces
|
130 |
+
text = re.sub(r'\s+', ' ', text)
|
131 |
+
return text
|
132 |
+
|
133 |
def get_pdf_text(pdf_file):
|
134 |
doc = pymupdf.open(stream=pdf_file.read(), filetype="pdf")
|
135 |
text = ""
|
|
|
162 |
return text
|
163 |
|
164 |
# Function to create text chunks
|
165 |
+
def segment_text(text, max_segment_length=500):
|
166 |
"""Segment the text into smaller chunks."""
|
167 |
sentences = sent_tokenize(text)
|
168 |
segments = []
|
|
|
306 |
def generate_question(context, answer, num_beams):
|
307 |
input_text = f"<context> {context} <answer> {answer}"
|
308 |
input_ids = tokenizer.encode(input_text, return_tensors='pt')
|
309 |
+
outputs = model.generate(input_ids, num_beams=num_beams, early_stopping=True, max_length=150)
|
310 |
question = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
311 |
return question
|
312 |
|
|
|
375 |
st.title(":blue[Question Generator System]")
|
376 |
session_id = get_session_id()
|
377 |
state = initialize_state(session_id)
|
|
|
378 |
with st.sidebar:
|
379 |
+
show_info = st.toggle('Show Info',True)
|
380 |
+
if show_info:
|
381 |
+
display_info()
|
382 |
st.subheader("Customization Options")
|
383 |
# Customization options
|
384 |
input_type = st.radio("Select Input Preference", ("Text Input","Upload PDF"))
|
|
|
396 |
extract_all_keywords = st.toggle("Extract Max Keywords",value=False)
|
397 |
with col2:
|
398 |
enable_feedback_mode = st.toggle("Enable Feedback Mode",False)
|
399 |
+
use_t5_small = st.toggle("Use T5-Small",False)
|
400 |
# set_state(session_id, 'generated_questions', state['generated_questions'])
|
401 |
+
if use_t5_small is True:
|
402 |
+
model, tokenizer = load_model('AneriThakkar/flan-t5-small-finetuned')
|
403 |
text = None
|
404 |
if input_type == "Text Input":
|
405 |
text = st.text_area("Enter text here:", value="Joe Biden, the current US president is on a weak wicket going in for his reelection later this November against former President Donald Trump.")
|
|
|
488 |
# Export buttons
|
489 |
# if st.session_state.generated_questions:
|
490 |
if state['generated_questions']:
|
491 |
+
with st.sidebar:
|
492 |
csv_data = export_to_csv(state['generated_questions'])
|
493 |
st.download_button(label="Download CSV", data=csv_data, file_name='questions.csv', mime='text/csv')
|
494 |
|
495 |
pdf_data = export_to_pdf(state['generated_questions'])
|
496 |
st.download_button(label="Download PDF", data=pdf_data, file_name='questions.pdf', mime='application/pdf')
|
497 |
+
|
498 |
with st.expander("View Visualizations"):
|
499 |
questions = [tpl['question'] for tpl in state['generated_questions']]
|
500 |
overall_scores = [tpl['overall_score'] for tpl in state['generated_questions']]
|