from langchain.chains import ConversationalRetrievalChain from langchain.chains.question_answering import load_qa_chain from langchain.memory import ConversationBufferMemory from langchain.memory import ConversationTokenBufferMemory from langchain.llms import HuggingFacePipeline # from langchain import PromptTemplate from langchain.prompts import PromptTemplate from langchain.embeddings import HuggingFaceEmbeddings from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from langchain.vectorstores import Chroma from langchain.embeddings import HuggingFaceBgeEmbeddings from langchain.document_loaders import ( CSVLoader, DirectoryLoader, GitLoader, NotebookLoader, OnlinePDFLoader, PythonLoader, TextLoader, UnstructuredFileLoader, UnstructuredHTMLLoader, UnstructuredPDFLoader, UnstructuredWordDocumentLoader, WebBaseLoader, PyPDFLoader, UnstructuredMarkdownLoader, UnstructuredEPubLoader, UnstructuredHTMLLoader, UnstructuredPowerPointLoader, UnstructuredODTLoader, NotebookLoader, UnstructuredFileLoader ) from transformers import ( AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList, pipeline, GenerationConfig, TextStreamer, pipeline ) from langchain.llms import HuggingFaceHub import torch from transformers import BitsAndBytesConfig import os from langchain.llms import CTransformers import streamlit as st from langchain.document_loaders.base import BaseLoader from langchain.schema import Document import gradio as gr import tempfile FILE_LOADER_MAPPING = { "csv": (CSVLoader, {"encoding": "utf-8"}), "doc": (UnstructuredWordDocumentLoader, {}), "docx": (UnstructuredWordDocumentLoader, {}), "epub": (UnstructuredEPubLoader, {}), "html": (UnstructuredHTMLLoader, {}), "md": (UnstructuredMarkdownLoader, {}), "odt": (UnstructuredODTLoader, {}), "pdf": (PyPDFLoader, {}), "ppt": (UnstructuredPowerPointLoader, {}), "pptx": (UnstructuredPowerPointLoader, {}), "txt": (TextLoader, {"encoding": "utf8"}), "ipynb": (NotebookLoader, {}), "py": (PythonLoader, {}), # Add more mappings for other file extensions and loaders as needed } def load_model(): # model_path=HuggingFaceHub(repo_id="vilsonrodrigues/falcon-7b-instruct-sharded") # if not os.path.exists(model_path): # raise FileNotFoundError(f"No model file found at {model_path}") # quantization_config = BitsAndBytesConfig( # load_in_4bit=True, # bnb_4bit_compute_dtype=torch.float16, # bnb_4bit_quant_type="nf4", # bnb_4bit_use_double_quant=True, # ) # model_4bit = AutoModelForCausalLM.from_pretrained( # model_path, # device_map="auto", # quantization_config=quantization_config, # ) # tokenizer = AutoTokenizer.from_pretrained(model_path) # pipeline = pipeline( # "text-generation", # model=model_4bit, # tokenizer=tokenizer, # use_cache=True, # device_map="auto", # max_length=700, # do_sample=True, # top_k=5, # num_return_sequences=1, # eos_token_id=tokenizer.eos_token_id, # pad_token_id=tokenizer.eos_token_id, # ) # llm = HuggingFacePipeline(pipeline=pipeline) # llm = CTransformers( # model=HuggingFaceHub(repo_id="TheBloke/Llama-2-7B-Chat-GGML", model_kwargs={"temperature":0.5, "max_length":512}) # # model_type=model_type, # # max_new_tokens=max_new_tokens, # type: ignore # # temperature=temperature, # type: ignore # ) llm = CTransformers( model = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF", model_file = "mistral-7b-instruct-v0.1.Q8_0.gguf", # model="TheBloke/Llama-2-70B-chat-GGUF", # model = "Deci/DeciLM-6b-instruct", callbacks=[StreamingStdOutCallbackHandler()] # model_type=model_type, # max_new_tokens=max_new_tokens, # type: ignore # temperature=temperature, # type: ignore ) return llm # def load_document( # # file_path: str, # uploaded_files: list, # mapping: dict = FILE_LOADER_MAPPING, # default_loader: BaseLoader = UnstructuredFileLoader, # ) -> Document: # loaded_documents = [] # for uploaded_file in uploaded_files: # # Choose loader from mapping, load default if no match found # # ext = "." + uploaded_files.rsplit(".", 1)[-1] # ext = os.path.splitext(uploaded_file.name)[-1][1:].lower() # if ext in mapping: # loader_class, loader_args = mapping[ext] # loader = loader_class(uploaded_file, **loader_args) # else: # loader = default_loader(uploaded_file) # loaded_documents.extend(loader.load()) # return loaded_documents def create_vector_database(loaded_documents): # DB_DIR: str = os.path.join(ABS_PATH, "db") """ Creates a vector database using document loaders and embeddings. This function loads data from PDF, markdown and text files in the 'data/' directory, splits the loaded documents into chunks, transforms them into embeddings using HuggingFace, and finally persists the embeddings into a Chroma vector database. """ # Split loaded documents into chunks text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=40, length_function = len) chunked_documents = text_splitter.split_documents(loaded_documents) # Initialize HuggingFace embeddings # embeddings = HuggingFaceEmbeddings( # model_name="sentence-transformers/all-MiniLM-L6-v2" # ) embeddings = HuggingFaceBgeEmbeddings( model_name = "BAAI/bge-large-en" ) # Create and persist a Chroma vector database from the chunked documents db = Chroma.from_documents( documents=chunked_documents, embedding=embeddings, persist_directory=persist_directory # persist_directory=DB_DIR, ) db.persist() # db = Chroma(persist_directory=persist_directory, # embedding_function=embedding) return db def set_custom_prompt_condense(): _template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language. Chat History: {chat_history} Follow Up Input: {question} Standalone question:""" CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template) return CONDENSE_QUESTION_PROMPT def set_custom_prompt(): """ Prompt template for retrieval for each vectorstore """ prompt_template = """ Important: Answer with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. If asking a clarifying question to the user would help, ask the question. ALWAYS return a "SOURCES" part in your answer, except for small-talk conversations. Question: {question} {context} Question: {question} Helpful Answer: --------------------------- --------------------------- Sources: """ prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"]) return prompt def create_chain(llm, prompt, CONDENSE_QUESTION_PROMPT, db): """ Creates a Retrieval Question-Answering (QA) chain using a given language model, prompt, and database. This function initializes a ConversationalRetrievalChain object with a specific chain type and configurations, and returns this chain. The retriever is set up to return the top 3 results (k=3). Args: llm (any): The language model to be used in the RetrievalQA. prompt (str): The prompt to be used in the chain type. db (any): The database to be used as the retriever. Returns: ConversationalRetrievalChain: The initialized conversational chain. """ memory = ConversationTokenBufferMemory(llm=llm, memory_key="chat_history", return_messages=True, input_key='question', output_key='answer') chain = ConversationalRetrievalChain.from_llm( llm=llm, chain_type="stuff", retriever=db.as_retriever(search_kwargs={"k": 3}), return_source_documents=True, max_tokens_limit=256, combine_docs_chain_kwargs={"prompt": prompt}, condense_question_prompt=CONDENSE_QUESTION_PROMPT, memory=memory, ) return chain def create_retrieval_qa_bot(loaded_documents): # if not os.path.exists(persist_dir): # raise FileNotFoundError(f"No directory found at {persist_dir}") try: llm = load_model() # Assuming this function exists and works as expected except Exception as e: raise Exception(f"Failed to load model: {str(e)}") try: prompt = set_custom_prompt() # Assuming this function exists and works as expected except Exception as e: raise Exception(f"Failed to get prompt: {str(e)}") try: CONDENSE_QUESTION_PROMPT = set_custom_prompt_condense() # Assuming this function exists and works as expected except Exception as e: raise Exception(f"Failed to get condense prompt: {str(e)}") try: db = create_vector_database(loaded_documents) # Assuming this function exists and works as expected except Exception as e: raise Exception(f"Failed to get database: {str(e)}") try: qa = create_chain( llm=llm, prompt=prompt,CONDENSE_QUESTION_PROMPT=CONDENSE_QUESTION_PROMPT, db=db ) # Assuming this function exists and works as expected except Exception as e: raise Exception(f"Failed to create retrieval QA chain: {str(e)}") return qa def retrieve_bot_answer(query, loaded_documents): """ Retrieves the answer to a given query using a QA bot. This function creates an instance of a QA bot, passes the query to it, and returns the bot's response. Args: query (str): The question to be answered by the QA bot. Returns: dict: The QA bot's response, typically a dictionary with response details. """ qa_bot_instance = create_retrieval_qa_bot(loaded_documents) bot_response = qa_bot_instance({"question": query}) # Check if the 'answer' key exists in the bot_response dictionary if 'answer' in bot_response: # answer = bot_response['answer'] return bot_response else: raise KeyError("Expected 'answer' key in bot_response, but it was not found.") # from your_module import load_model, set_custom_prompt, set_custom_prompt_condense, create_vector_database, retrieve_bot_answer def main(): st.title("Docuverse") # Upload files uploaded_files = st.file_uploader("Upload your documents", type=["pdf", "md", "txt", "csv", "py", "epub", "html", "ppt", "pptx", "doc", "docx", "odt", "ipynb"], accept_multiple_files=True) loaded_documents = [] if uploaded_files: # Create a temporary directory with tempfile.TemporaryDirectory() as td: # Move the uploaded files to the temporary directory and process them for uploaded_file in uploaded_files: st.write(f"Uploaded: {uploaded_file.name}") ext = os.path.splitext(uploaded_file.name)[-1][1:].lower() st.write(f"Uploaded: {ext}") # Check if the extension is in FILE_LOADER_MAPPING if ext in FILE_LOADER_MAPPING: loader_class, loader_args = FILE_LOADER_MAPPING[ext] # st.write(f"loader_class: {loader_class}") # Save the uploaded file to the temporary directory file_path = os.path.join(td, uploaded_file.name) with open(file_path, 'wb') as temp_file: temp_file.write(uploaded_file.read()) # Use Langchain loader to process the file loader = loader_class(file_path, **loader_args) loaded_documents.extend(loader.load()) else: st.warning(f"Unsupported file extension: {ext}") # st.write(f"loaded_documents: {loaded_documents}") st.write("Chat with the Document:") query = st.text_input("Ask a question:") if st.button("Get Answer"): if query: # Load model, set prompts, create vector database, and retrieve answer try: llm = load_model() prompt = set_custom_prompt() CONDENSE_QUESTION_PROMPT = set_custom_prompt_condense() db = create_vector_database(loaded_documents) # st.write(f"db: {db}") response = retrieve_bot_answer(query,loaded_documents) # st.write(f"response: {response}") # Display bot response st.write("Bot Response:") st.write(response['answer']) except Exception as e: st.error(f"An error occurred: {str(e)}") else: st.warning("Please enter a question.") if __name__ == "__main__": main()