Spaces:

jarif
/

QueryMaster-LangChain-RAG-Assistant

Sleeping

App Files Files Community

QueryMaster-LangChain-RAG-Assistant / app.py

jarif

Update app.py

7e9669e verified 3 months ago

raw

history blame

5.42 kB

	import streamlit as st
	from PyPDF2 import PdfReader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_google_genai import GoogleGenerativeAIEmbeddings
	import google.generativeai as genai
	from langchain.vectorstores import FAISS
	from langchain_google_genai import ChatGoogleGenerativeAI
	from langchain.chains.question_answering import load_qa_chain
	from langchain.prompts import PromptTemplate
	from dotenv import load_dotenv
	import os

	# Load environment variables from .env file
	load_dotenv()

	# Fetch the Google API key from the .env file
	api_key = os.getenv("GOOGLE_API_KEY")

	# Set the page configuration for the Streamlit app
	st.set_page_config(page_title="DocWizard Instant Insights and Analysis", layout="wide")

	# Header and Instructions
	st.markdown("""
	## Document Intelligence Explorer 🤖

	This chatbot utilizes the Retrieval-Augmented Generation (RAG) framework with Google's Generative AI model Gemini-PRO. It processes uploaded PDF documents by segmenting them into chunks, creating a searchable vector store, and generating precise answers to your questions. This method ensures high-quality, contextually relevant responses for an efficient user experience.

	### How It Works

	1. Upload Your Documents: You can upload multiple PDF files simultaneously for comprehensive analysis.
	2. Ask a Question: After processing the documents, type your question related to the content of your uploaded documents for a detailed answer.
	""")

	def get_pdf_text(pdf_docs):
	"""
	Extract text from uploaded PDF documents.
	"""
	text = ""
	for pdf in pdf_docs:
	pdf_reader = PdfReader(pdf)
	for page in pdf_reader.pages:
	page_text = page.extract_text()
	if page_text:
	text += page_text
	return text

	def get_text_chunks(text):
	"""
	Split text into manageable chunks for processing.
	"""
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
	chunks = text_splitter.split_text(text)
	return chunks

	def get_vector_store(text_chunks, api_key):
	"""
	Create and save a FAISS vector store from text chunks.
	"""
	try:
	embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)
	vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
	vector_store.save_local("faiss_index")
	st.success("FAISS index created and saved successfully.")
	except Exception as e:
	st.error(f"Error creating FAISS index: {e}")

	def get_conversational_chain(api_key):
	"""
	Set up the conversational chain using the Gemini-PRO model.
	"""
	prompt_template = """
	Answer the question as detailed as possible from the provided context. If the answer is not in the provided context,
	say "Answer is not available in the context". Do not provide incorrect information.\n\n
	Context:\n{context}\n
	Question:\n{question}\n
	Answer:
	"""
	model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3, google_api_key=api_key)
	prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
	chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
	return chain

	def user_input(user_question, api_key):
	"""
	Handle user input and generate a response from the chatbot.
	"""
	embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)

	try:
	new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
	docs = new_db.similarity_search(user_question)
	chain = get_conversational_chain(api_key)
	response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)
	st.write("Reply:", response["output_text"])
	except ValueError as e:
	st.error(f"Error loading FAISS index or generating response: {e}")

	def main():
	"""
	Main function to run the Streamlit app.
	"""
	st.header("AI Assistant 🤖")

	user_question = st.text_input("Ask a Question from the PDF Files", key="user_question")

	if st.button("Generate Text", key="generate_button"): # Add a button to generate text
	if user_question: # Trigger user input function only if there's a question
	with st.spinner("Generating result..."): # Display spinner while generating
	user_input(user_question, api_key)

	with st.sidebar:
	st.title("Menu:")
	pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True, key="pdf_uploader")

	if st.button("Submit & Process", key="process_button"):
	if not api_key:
	st.error("Google API key is missing. Please add it to the .env file.")
	return

	if pdf_docs:
	with st.spinner("Processing..."):
	raw_text = get_pdf_text(pdf_docs)
	text_chunks = get_text_chunks(raw_text)
	get_vector_store(text_chunks, api_key)
	st.success("Processing complete. You can now ask questions based on the uploaded documents.")
	else:
	st.error("No PDF files uploaded. Please upload at least one PDF file to proceed.")

	if __name__ == "__main__":
	main()