Spaces:
Sleeping
Sleeping
import streamlit as st | |
import subprocess | |
import sys | |
# Install pdfplumber and transformers if they are not already installed | |
try: | |
import pdfplumber | |
except ModuleNotFoundError: | |
subprocess.check_call([sys.executable, "-m", "pip", "install", "pdfplumber"]) | |
import pdfplumber # Re-import after installation | |
try: | |
from transformers import pipeline | |
except ModuleNotFoundError: | |
subprocess.check_call([sys.executable, "-m", "pip", "install", "transformers"]) | |
from transformers import pipeline # Re-import after installation | |
# Function to extract text from PDFs using pdfplumber | |
def extract_text_from_pdfs(pdf_files): | |
pdf_texts = {} | |
for pdf_file in pdf_files: | |
with pdfplumber.open(pdf_file) as pdf: | |
text = "" | |
for page in pdf.pages: | |
text += page.extract_text() | |
pdf_texts[pdf_file.name] = text | |
return pdf_texts | |
# Load pre-trained QA model | |
qa_pipeline = pipeline('question-answering', model='distilbert-base-uncased-distilled-squad') | |
# Function to answer questions based on extracted text | |
def answer_question(pdf_texts, question): | |
context = " ".join(pdf_texts.values()) | |
result = qa_pipeline(question=question, context=context) | |
return result['answer'] | |
# Streamlit application | |
st.title("PDF Question Answering App") | |
# File uploader for PDF files | |
uploaded_files = st.file_uploader("Upload PDF files", type="pdf", accept_multiple_files=True) | |
# Display uploaded files | |
if uploaded_files: | |
# Extract text from PDFs | |
pdf_texts = extract_text_from_pdfs(uploaded_files) | |
st.write("PDFs Uploaded Successfully!") | |
# Question input | |
question = st.text_input("Enter your question:") | |
if st.button("Get Answer"): | |
if question: | |
answer = answer_question(pdf_texts, question) | |
st.write(f"Answer: {answer}") | |
else: | |
st.write("Please enter a question.") | |
else: | |
st.write("Please upload PDF files to continue.") | |