import streamlit as st import subprocess import sys # Install pdfplumber if it's not already installed try: import pdfplumber except ModuleNotFoundError: subprocess.check_call([sys.executable, "-m", "pip", "install", "pdfplumber"]) import pdfplumber # Re-import after installation from transformers import pipeline # Function to extract text from PDFs using pdfplumber def extract_text_from_pdfs(pdf_files): pdf_texts = {} for pdf_file in pdf_files: with pdfplumber.open(pdf_file) as pdf: text = "" for page in pdf.pages: text += page.extract_text() pdf_texts[pdf_file.name] = text return pdf_texts # Load pre-trained QA model qa_pipeline = pipeline('question-answering', model='distilbert-base-uncased-distilled-squad') # Function to answer questions based on extracted text def answer_question(pdf_texts, question): context = " ".join(pdf_texts.values()) result = qa_pipeline(question=question, context=context) return result['answer'] # Streamlit application st.title("PDF Question Answering App") # File uploader for PDF files uploaded_files = st.file_uploader("Upload PDF files", type="pdf", accept_multiple_files=True) # Display uploaded files if uploaded_files: # Extract text from PDFs pdf_texts = extract_text_from_pdfs(uploaded_files) st.write("PDFs Uploaded Successfully!") # Question input question = st.text_input("Enter your question:") if st.button("Get Answer"): if question: answer = answer_question(pdf_texts, question) st.write(f"Answer: {answer}") else: st.write("Please enter a question.") else: st.write("Please upload PDF files to continue.")