Spaces:

data-catering
/

document-answering

Runtime error

App Files Files Community

pflooky commited on Jan 19

Commit

8324134

•

1 Parent(s): 0760431

Use gradio for document answering

Browse files

Files changed (6) hide show

.gitignore +1 -0
app.py +130 -3
llm_model.py +96 -0
requirements.txt +12 -0
streamlit_app.py +158 -0
vector_db.py +46 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .idea

app.py CHANGED Viewed

@@ -1,4 +1,131 @@
-import streamlit as st
-x = st.slider('Select a value')
-st.write(x, 'squared is', x * x)

+import gradio as gr
+from langchain.docstore.document import Document
+from langchain.text_splitter import RecursiveCharacterTextSplitter, Language
+import vector_db as vdb
+from llm_model import LLMModel
+chunk_size = 2000
+chunk_overlap = 200
+uploaded_docs = []
+uploaded_df = gr.Dataframe(headers=["file_name", "content_length"])
+upload_files_section = gr.Files(
+    file_types=[".md", ".mdx", ".rst", ".txt"],
+)
+chatbot_stream = gr.Chatbot(bubble_full_width=False, show_copy_button=True)
+def load_docs(files):
+    all_docs = []
+    all_qa = []
+    for file in files:
+        if file.name is not None:
+            with open(file.name, "r") as f:
+                file_content = f.read()
+            file_name = file.name.split("/")[-1]
+            # Create document with metadata
+            doc = Document(page_content=file_content, metadata={"source": file_name})
+            # Create an instance of the RecursiveCharacterTextSplitter class with specific parameters.
+            # It splits text into chunks of 1000 characters each with a 150-character overlap.
+            language = get_language(file_name)
+            text_splitter = RecursiveCharacterTextSplitter.from_language(
+                chunk_size=chunk_size,
+                chunk_overlap=chunk_overlap,
+                language=language
+            )
+            # Split the text into chunks using the text splitter.
+            doc_chunks = text_splitter.split_documents([doc])
+            print(f"Number of chunks: {len(doc_chunks)}")
+            # Foreach chunk, send to LLM to get potential questions and answers
+            for doc_chunk in doc_chunks:
+                gr.Info("Analysing document...")
+                potential_qa_from_doc = llm_model.get_potential_question_answer(doc_chunk.page_content)
+                all_qa += [Document(page_content=potential_qa_from_doc, metadata=doc_chunk.metadata)]
+            all_docs += doc_chunks
+            uploaded_docs.append(file.name)
+    vector_db.load_docs_into_vector_db(all_qa)
+    gr.Info("Loaded document(s) into vector db.")
+    return uploaded_docs
+def get_language(file_name: str):
+    if file_name.endswith(".md") or file_name.endswith(".mdx"):
+        return Language.MARKDOWN
+    elif file_name.endswith(".rst"):
+        return Language.RST
+    else:
+        return Language.MARKDOWN
+def get_vector_db():
+    return vdb.VectorDB()
+def get_llm_model(_db: vdb.VectorDB):
+    retriever = _db.docs_db.as_retriever(search_kwargs={"k": 2})
+    # return LLMModel(retriever=retriever).create_qa_chain()
+    return LLMModel(retriever=retriever)
+def predict(message, history):
+    # resp = llm_model.answer_question_inference(message)
+    # return resp.get("answer")
+    resp = llm_model.answer_question_inference_text_gen(message)
+    final_resp = ""
+    for c in resp:
+        final_resp += str(c)
+        yield final_resp
+    # start_time = time.time()
+    # res = llm_model({"query": message})
+    # sources = []
+    # for source_docs in res['source_documents']:
+    #     if 'source' in source_docs.metadata:
+    #         sources.append(source_docs.metadata['source'])
+    # # Display assistant response in chat message container
+    # end_time = time.time()
+    # time_taken = "{:.2f}".format(end_time - start_time)
+    # format_answer = f"## Result\n\n{res['result']}\n\n### Sources\n\n{sources}\n\nTime taken: {time_taken}s"
+    # format_source = None
+    # for source_docs in res['source_documents']:
+    #     if 'source' in source_docs.metadata:
+    #         format_source = f"## File: {source_docs.metadata['source']}\n\n{source_docs.page_content}"
+    #
+    # return format_answer
+def vote(data: gr.LikeData):
+    if data.liked:
+        gr.Info("You upvoted this response 😊", )
+    else:
+        gr.Warning("You downvoted this response 👀")
+vector_db = get_vector_db()
+llm_model = get_llm_model(vector_db)
+chat_interface_stream = gr.ChatInterface(
+    predict,
+    title="👀 Document answering bot",
+    description="📚🔦 Upload some documents on the side and ask questions!",
+    textbox=gr.Textbox(container=False, scale=7),
+    chatbot=chatbot_stream,
+    examples=["What is Data Caterer?", "Provide a set of potential questions and answers about the README"]
+)
+with gr.Blocks() as blocks:
+    with gr.Row():
+        with gr.Column(scale=1, min_width=100) as upload_col:
+            gr.Interface(
+                load_docs,
+                title="📖 Upload documents",
+                inputs=upload_files_section,
+                outputs=gr.Files(),
+                allow_flagging="never"
+            )
+            # upload_files_section.upload(load_docs, inputs=upload_files_section)
+        with gr.Column(scale=4, min_width=600) as chat_col:
+            chatbot_stream.like(vote, None, None)
+            chat_interface_stream.render()
+blocks.queue().launch()

llm_model.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import os
+import requests
+from huggingface_hub import InferenceClient
+from langchain.chains import RetrievalQA
+from langchain.prompts import PromptTemplate
+from langchain_community.llms import CTransformers
+from langchain_core.vectorstores import VectorStoreRetriever
+class LLMModel:
+    base_model = "TheBloke/Llama-2-7B-GGUF"
+    specific_model = "llama-2-7b.Q4_K_M.gguf"
+    token_model = "meta-llama/Llama-2-7b-hf"
+    llm_config = {'context_length': 2048, 'max_new_tokens': 1024, 'temperature': 0.3, 'top_p': 1.0}
+    question_answer_system_prompt = """You are a helpful question answer assistant. Given the following context and a question, provide a set of potential questions and answers.
+        Keep answers brief and well-structured. Do not give one word answers."""
+    final_assistant_system_prompt = """You are a helpful assistant. Given the following list of relevant questions and answers, generate an answer based on this list only.
+        Keep answers brief and well-structured. Do not give one word answers.
+        If the answer is not found in the list, kindly state "I don't know.". Don't try to make up an answer."""
+    template = """<s>[INST] <<SYS>>
+        You are a question answer assistant. Given the following context and a question, generate an answer based on this context only.
+        Keep answers brief and well-structured. Do not give one word answers.
+        If the answer is not found in the context, kindly state "I don't know.". Don't try to make up an answer.
+        <</SYS>>
+        Context: {context}
+        Question: Give me a step by step explanation of {question}[/INST]
+        Answer:"""
+    qa_chain_prompt = PromptTemplate.from_template(template)
+    retriever = None
+    hf_token = os.getenv('HF_TOKEN')
+    api_url = os.getenv('API_URL')
+    headers = {"Authorization": f"Bearer {hf_token}"}
+    client = InferenceClient(api_url)
+    # llm = CTransformers(model=base_model, model_file=specific_model, config=llm_config, hf=True)
+    llm = None
+    def __init__(self, retriever: VectorStoreRetriever):
+        self.retriever = retriever
+    def create_qa_chain(self):
+        return RetrievalQA.from_chain_type(
+            llm=self.llm,
+            chain_type="stuff",
+            retriever=self.retriever,
+            return_source_documents=True,
+            chain_type_kwargs={"prompt": self.qa_chain_prompt},
+        )
+    def format_retrieved_docs(self, docs):
+        all_docs = []
+        for doc in docs:
+            if "source" in doc.metadata:
+                all_docs.append(f"""Document: {doc.metadata['source']}\nContent: {doc.page_content}\n\n""")
+        return all_docs
+    def format_query(self, question, context, system_prompt):
+        prompt = f"""[INST] {system_prompt}
+        Context: {context}
+        Question: Give me a step by step explanation of {question}[/INST]"""
+        return prompt
+    def format_question(self, question):
+        relevant_docs = self.retriever.get_relevant_documents(question)
+        formatted_docs = self.format_retrieved_docs(relevant_docs)
+        return self.format_query(question, formatted_docs, self.final_assistant_system_prompt)
+    def get_potential_question_answer(self, document_chunk: str):
+        prompt = self.format_query("potential questions and answers.", document_chunk, self.question_answer_system_prompt)
+        return self.client.text_generation(prompt, max_new_tokens=512, temperature=0.4)
+    def answer_question_inference_text_gen(self, question):
+        prompt = self.format_question(question)
+        return self.client.text_generation(prompt, max_new_tokens=512, temperature=0.4)
+    def answer_question_inference(self, question):
+        relevant_docs = self.retriever.get_relevant_documents(question)
+        formatted_docs = "".join(self.format_retrieved_docs(relevant_docs))
+        if not formatted_docs:
+            return "No uploaded documents. Please try upload a document on the left side."
+        else:
+            print(formatted_docs)
+            return self.client.question_answering(question=question, context=formatted_docs)
+    def answer_question_api(self, question):
+        formatted_prompt = self.format_question(question)
+        resp = requests.post(self.api_url, headers=self.headers, json={"inputs": formatted_prompt}, stream=True)
+        for c in resp.iter_content():
+            yield c

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+tiktoken
+faiss-cpu
+ctransformers
+transformers
+sentence-transformers
+streamlit
+streamlit_lottie
+gradio
+huggingface_hub
+langchain
+langchain_experimental
+llama-cpp-python

streamlit_app.py ADDED Viewed

	@@ -0,0 +1,158 @@

+from io import StringIO
+import streamlit as st
+from langchain.docstore.document import Document
+from langchain.text_splitter import RecursiveCharacterTextSplitter, Language
+import time
+import vector_db as vdb
+from llm_model import LLMModel
+def default_state():
+    if "startup" not in st.session_state:
+        st.session_state.startup = True
+    if "messages" not in st.session_state:
+        st.session_state.messages = []
+    if "uploaded_docs" not in st.session_state:
+        st.session_state.uploaded_docs = []
+    if "llm_option" not in st.session_state:
+        st.session_state.llm_option = "Local"
+    if "answer_loading" not in st.session_state:
+        st.session_state.answer_loading = False
+def load_doc(file_name: str, file_content: str):
+    if file_name is not None:
+        # Create document with metadata
+        doc = Document(page_content=file_content, metadata={"source": file_name})
+        # Create an instance of the RecursiveCharacterTextSplitter class with specific parameters.
+        # It splits text into chunks of 1000 characters each with a 150-character overlap.
+        language = get_language(file_name)
+        text_splitter = RecursiveCharacterTextSplitter.from_language(chunk_size=1000, chunk_overlap=150,
+                                                                     language=language)
+        # Split the text into chunks using the text splitter.
+        docs = text_splitter.split_documents([doc])
+        return docs
+    else:
+        return None
+def get_language(file_name: str):
+    if file_name.endswith(".md") or file_name.endswith(".mdx"):
+        return Language.MARKDOWN
+    elif file_name.endswith(".rst"):
+        return Language.RST
+    else:
+        return Language.MARKDOWN
+@st.cache_resource()
+def get_vector_db():
+    return vdb.VectorDB()
+@st.cache_resource()
+def get_llm_model(_db: vdb.VectorDB):
+    retriever = _db.docs_db.as_retriever(search_kwargs={"k": 2})
+    return LLMModel(retriever=retriever).create_qa_chain()
+# Initialize an instance of the RetrievalQA class with the specified parameters
+def init_sidebar():
+    with st.sidebar:
+        st.toggle(
+            "Loading from LLM",
+            on_change=enable_sidebar(),
+            disabled=not st.session_state.answer_loading
+        )
+        llm_option = st.selectbox(
+            'Select to use local model or inference API',
+            options=['Local', 'Inference API']
+        )
+        st.session_state.llm_option = llm_option
+        uploaded_files = st.file_uploader(
+            'Upload file(s)',
+            type=['md', 'mdx', 'rst', 'txt'],
+            accept_multiple_files=True
+        )
+        for uploaded_file in uploaded_files:
+            if uploaded_file.name not in st.session_state.uploaded_docs:
+                # Read the file as a string
+                stringio = StringIO(uploaded_file.getvalue().decode("utf-8"))
+                string_data = stringio.read()
+                # Get chunks of text
+                doc_chunks = load_doc(uploaded_file.name, string_data)
+                st.write(f"Number of chunks={len(doc_chunks)}")
+                vector_db.load_docs_into_vector_db(doc_chunks)
+                st.session_state.uploaded_docs.append(uploaded_file.name)
+def init_chat():
+    # Display chat messages from history on app rerun
+    for message in st.session_state.messages:
+        with st.chat_message(message["role"]):
+            st.markdown(message["content"])
+def disable_sidebar():
+    st.session_state.answer_loading = True
+    st.rerun()
+def enable_sidebar():
+    st.session_state.answer_loading = False
+st.set_page_config(page_title="Document Answering Tool", page_icon=":book:")
+vector_db = get_vector_db()
+default_state()
+init_sidebar()
+st.header("Document answering tool")
+st.subheader("Upload your documents on the side and ask questions")
+init_chat()
+llm_model = get_llm_model(vector_db)
+st.session_state.startup = False
+# React to user input
+if user_prompt := st.chat_input("What's up?", on_submit=disable_sidebar()):
+    # if st.session_state.answer_loading:
+    #     st.warning("Cannot ask multiple questions at the same time")
+    #     st.session_state.answer_loading = False
+    # else:
+    start_time = time.time()
+    # Display user message in chat message container
+    with st.chat_message("user"):
+        st.markdown(user_prompt)
+    # Add user message to chat history
+    st.session_state.messages.append({"role": "user", "content": user_prompt})
+    if llm_model is not None:
+        assistant_chat = st.chat_message("assistant")
+        if not st.session_state.uploaded_docs:
+            assistant_chat.warning("WARN: Will try answer question without documents")
+        with st.spinner('Resolving question...'):
+            res = llm_model({"query": user_prompt})
+        sources = []
+        for source_docs in res['source_documents']:
+            if 'source' in source_docs.metadata:
+                sources.append(source_docs.metadata['source'])
+        # Display assistant response in chat message container
+        end_time = time.time()
+        time_taken = "{:.2f}".format(end_time - start_time)
+        format_answer = f"## Result\n\n{res['result']}\n\n### Sources\n\n{sources}\n\nTime taken: {time_taken}s"
+        assistant_chat.markdown(format_answer)
+        source_expander = assistant_chat.expander("See full sources")
+        for source_docs in res['source_documents']:
+            if 'source' in source_docs.metadata:
+                format_source = f"## File: {source_docs.metadata['source']}\n\n{source_docs.page_content}"
+                source_expander.markdown(format_source)
+        # Add assistant response to chat history
+        st.session_state.messages.append({"role": "assistant", "content": format_answer})
+        enable_sidebar()
+        st.rerun()

vector_db.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from langchain.schema import Document
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.vectorstores.faiss import FAISS
+class VectorDB:
+    embedding_model = "sentence-transformers/all-MiniLM-l6-v2"
+    model_kwargs = {'device': 'cpu'}
+    encode_kwargs = {'normalize_embeddings': False}
+    local_folder = "db/faiss_db"
+    is_load_local = False
+    text_embeddings = None
+    docs_db = None
+    def __init__(self):
+        self.text_embeddings = self.init_text_embeddings(self.embedding_model, self.model_kwargs, self.encode_kwargs)
+        self.docs_db = self.init_vector_db(self.local_folder, self.text_embeddings)
+    def init_text_embeddings(self, embedding_model: str, model_kwargs: dict, encode_kwargs: dict):
+        return HuggingFaceEmbeddings(
+            model_name=embedding_model,
+            model_kwargs=model_kwargs,
+            encode_kwargs=encode_kwargs
+        )
+    def init_vector_db(self, folder_path: str, text_embeddings: HuggingFaceEmbeddings):
+        if self.is_load_local:
+            try:
+                return FAISS.load_local(folder_path=folder_path, embeddings=text_embeddings)
+            except Exception as e:
+                return FAISS.from_documents([Document(page_content="")], embedding=text_embeddings)
+        else:
+            return FAISS.from_documents([Document(page_content="")], embedding=text_embeddings)
+    def load_docs_into_vector_db(self, doc_chunks: list):
+        if len(doc_chunks) != 0:
+            if self.docs_db is None:
+                self.docs_db = FAISS.from_documents(doc_chunks, embedding=self.text_embeddings)
+            else:
+                self.docs_db.add_documents(doc_chunks)
+    def save_vector_db(self):
+        if self.docs_db is not None and not self.is_load_local:
+            self.docs_db.save_local(self.local_folder)
+        else:
+            print("No vector db to save.")