import streamlit as st import fitz # PyMuPDF from transformers import pipeline import glob # Function to extract text from PDFs def extract_text_from_pdfs(pdf_files): pdf_texts = {} for pdf_file in pdf_files: with fitz.open(pdf_file) as doc: text = "" for page in doc: text += page.get_text() pdf_texts[pdf_file] = text return pdf_texts # Load pre-trained QA model qa_pipeline = pipeline('question-answering', model='distilbert-base-uncased-distilled-squad') # Function to answer questions based on extracted text def answer_question(pdf_texts, question): context = " ".join(pdf_texts.values()) result = qa_pipeline(question=question, context=context) return result['answer'] # Streamlit application st.title("PDF Question Answering App") # File uploader for PDF files uploaded_files = st.file_uploader("Upload PDF files", type="pdf", accept_multiple_files=True) # Display uploaded files if uploaded_files: # Extract text from PDFs pdf_texts = extract_text_from_pdfs([file.name for file in uploaded_files]) st.write("PDFs Uploaded Successfully!") # Question input question = st.text_input("Enter your question:") if st.button("Get Answer"): if question: answer = answer_question(pdf_texts, question) st.write(f"Answer: {answer}") else: st.write("Please enter a question.") else: st.write("Please upload PDF files to continue.")