jarif's picture
Update app.py
7e9669e verified
raw
history blame
5.42 kB
import streamlit as st
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import google.generativeai as genai
from langchain.vectorstores import FAISS
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv
import os
# Load environment variables from .env file
load_dotenv()
# Fetch the Google API key from the .env file
api_key = os.getenv("GOOGLE_API_KEY")
# Set the page configuration for the Streamlit app
st.set_page_config(page_title="DocWizard Instant Insights and Analysis", layout="wide")
# Header and Instructions
st.markdown("""
## Document Intelligence Explorer πŸ€–
This chatbot utilizes the Retrieval-Augmented Generation (RAG) framework with Google's Generative AI model Gemini-PRO. It processes uploaded PDF documents by segmenting them into chunks, creating a searchable vector store, and generating precise answers to your questions. This method ensures high-quality, contextually relevant responses for an efficient user experience.
### How It Works
1. **Upload Your Documents**: You can upload multiple PDF files simultaneously for comprehensive analysis.
2. **Ask a Question**: After processing the documents, type your question related to the content of your uploaded documents for a detailed answer.
""")
def get_pdf_text(pdf_docs):
"""
Extract text from uploaded PDF documents.
"""
text = ""
for pdf in pdf_docs:
pdf_reader = PdfReader(pdf)
for page in pdf_reader.pages:
page_text = page.extract_text()
if page_text:
text += page_text
return text
def get_text_chunks(text):
"""
Split text into manageable chunks for processing.
"""
text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
chunks = text_splitter.split_text(text)
return chunks
def get_vector_store(text_chunks, api_key):
"""
Create and save a FAISS vector store from text chunks.
"""
try:
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)
vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
vector_store.save_local("faiss_index")
st.success("FAISS index created and saved successfully.")
except Exception as e:
st.error(f"Error creating FAISS index: {e}")
def get_conversational_chain(api_key):
"""
Set up the conversational chain using the Gemini-PRO model.
"""
prompt_template = """
Answer the question as detailed as possible from the provided context. If the answer is not in the provided context,
say "Answer is not available in the context". Do not provide incorrect information.\n\n
Context:\n{context}\n
Question:\n{question}\n
Answer:
"""
model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3, google_api_key=api_key)
prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
return chain
def user_input(user_question, api_key):
"""
Handle user input and generate a response from the chatbot.
"""
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)
try:
new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
docs = new_db.similarity_search(user_question)
chain = get_conversational_chain(api_key)
response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)
st.write("Reply:", response["output_text"])
except ValueError as e:
st.error(f"Error loading FAISS index or generating response: {e}")
def main():
"""
Main function to run the Streamlit app.
"""
st.header("AI Assistant πŸ€–")
user_question = st.text_input("Ask a Question from the PDF Files", key="user_question")
if st.button("Generate Text", key="generate_button"): # Add a button to generate text
if user_question: # Trigger user input function only if there's a question
with st.spinner("Generating result..."): # Display spinner while generating
user_input(user_question, api_key)
with st.sidebar:
st.title("Menu:")
pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True, key="pdf_uploader")
if st.button("Submit & Process", key="process_button"):
if not api_key:
st.error("Google API key is missing. Please add it to the .env file.")
return
if pdf_docs:
with st.spinner("Processing..."):
raw_text = get_pdf_text(pdf_docs)
text_chunks = get_text_chunks(raw_text)
get_vector_store(text_chunks, api_key)
st.success("Processing complete. You can now ask questions based on the uploaded documents.")
else:
st.error("No PDF files uploaded. Please upload at least one PDF file to proceed.")
if __name__ == "__main__":
main()