|
from langchain.chains import RetrievalQA |
|
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler |
|
from langchain.callbacks.manager import CallbackManager |
|
|
|
|
|
from langchain_community.vectorstores import Chroma |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain_community.document_loaders import PyPDFLoader |
|
from langchain.prompts import PromptTemplate |
|
from langchain.memory import ConversationBufferMemory |
|
import streamlit as st |
|
import os |
|
import time |
|
from langchain_community.llms import HuggingFaceEndpoint |
|
|
|
|
|
from langchain_community.embeddings import HuggingFaceEmbeddings |
|
|
|
model_name = "sentence-transformers/all-mpnet-base-v2" |
|
model_kwargs = {'device': 'cpu'} |
|
encode_kwargs = {'normalize_embeddings': False} |
|
embeddings = HuggingFaceEmbeddings( |
|
model_name=model_name, |
|
model_kwargs=model_kwargs, |
|
encode_kwargs=encode_kwargs |
|
) |
|
|
|
if not os.path.exists('files'): |
|
os.mkdir('files') |
|
|
|
if not os.path.exists('jj'): |
|
os.mkdir('jj') |
|
|
|
if 'template' not in st.session_state: |
|
st.session_state.template = """You are a knowledgeable chatbot, here to help with questions of the user. Your tone should be professional and informative.Try to give answer in tabular and shortcut. |
|
|
|
Context: {context} |
|
History: {history} |
|
|
|
User: {question} |
|
Chatbot:""" |
|
if 'prompt' not in st.session_state: |
|
st.session_state.prompt = PromptTemplate( |
|
input_variables=["history", "context", "question"], |
|
template=st.session_state.template, |
|
) |
|
if 'memory' not in st.session_state: |
|
st.session_state.memory = ConversationBufferMemory( |
|
memory_key="history", |
|
return_messages=True, |
|
input_key="question") |
|
if 'vectorstore' not in st.session_state: |
|
|
|
st.session_state.vectorstore = Chroma(persist_directory='jj', embedding_function=embeddings) |
|
|
|
if 'llm' not in st.session_state: |
|
|
|
st.session_state.llm = HuggingFaceEndpoint(repo_id="mistralai/Mistral-7B-Instruct-v0.2", Temperature=0.9) |
|
|
|
|
|
if 'chat_history' not in st.session_state: |
|
st.session_state.chat_history = [] |
|
|
|
st.title("PDF Chatbot") |
|
|
|
|
|
uploaded_file = st.file_uploader("Upload your PDF", type='pdf') |
|
|
|
for message in st.session_state.chat_history: |
|
with st.chat_message(message["role"]): |
|
st.markdown(message["message"]) |
|
|
|
if uploaded_file is not None: |
|
if not os.path.isfile("files/"+uploaded_file.name+".pdf"): |
|
with st.status("Analyzing your document..."): |
|
bytes_data = uploaded_file.read() |
|
f = open("files/"+uploaded_file.name+".pdf", "wb") |
|
f.write(bytes_data) |
|
f.close() |
|
loader = PyPDFLoader("files/"+uploaded_file.name+".pdf") |
|
data = loader.load() |
|
|
|
|
|
text_splitter = RecursiveCharacterTextSplitter( |
|
chunk_size=1500, |
|
chunk_overlap=0, |
|
length_function=len |
|
) |
|
all_splits = text_splitter.split_documents(data) |
|
|
|
|
|
|
|
st.session_state.vectorstore = Chroma.from_documents(documents=all_splits,embedding=embeddings) |
|
st.session_state.vectorstore.persist() |
|
|
|
st.session_state.retriever = st.session_state.vectorstore.as_retriever() |
|
|
|
if 'qa_chain' not in st.session_state: |
|
st.session_state.qa_chain = RetrievalQA.from_chain_type( |
|
llm=st.session_state.llm, |
|
chain_type='stuff', |
|
retriever=st.session_state.retriever, |
|
verbose=True, |
|
chain_type_kwargs={ |
|
"verbose": True, |
|
"prompt": st.session_state.prompt, |
|
"memory": st.session_state.memory, |
|
} |
|
) |
|
|
|
|
|
if user_input := st.chat_input("You:", key="user_input"): |
|
user_message = {"role": "user", "message": user_input} |
|
st.session_state.chat_history.append(user_message) |
|
with st.chat_message("user"): |
|
st.markdown(user_input) |
|
with st.chat_message("assistant"): |
|
with st.spinner("Assistant is typing..."): |
|
response = st.session_state.qa_chain(user_input) |
|
message_placeholder = st.empty() |
|
full_response = "" |
|
for chunk in response['result'].split(): |
|
full_response += chunk + " " |
|
time.sleep(0.05) |
|
|
|
message_placeholder.markdown(full_response + "β") |
|
message_placeholder.markdown(full_response) |
|
|
|
chatbot_message = {"role": "assistant", "message": response['result']} |
|
st.session_state.chat_history.append(chatbot_message) |
|
|
|
|
|
else: |
|
st.write("Please upload a PDF file.") |