Spaces:

rohan13
/

makerlab-bot

Runtime error

App Files Files Community

pr3

by rohan13 - opened Apr 5, 2023

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

+1960

-37

Files changed (6) hide show

main.py +19 -14
poetry.lock +0 -0
pyproject.toml +25 -0
requirements.txt +2 -2
static/chatbot.js +1 -1
utils.py +219 -20

main.py CHANGED Viewed

@@ -1,22 +1,27 @@
-from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
-from langchain.embeddings.openai import OpenAIEmbeddings
-from langchain.llms import OpenAI
-from utils import generate_answer
-from utils import get_search_index
-open_ai_pkl = "open_ai.pkl"
-open_ai_index = "open_ai.index"
-gpt_3_5 = OpenAI(model_name='gpt-3.5-turbo',temperature=0)
-open_ai_embeddings = OpenAIEmbeddings()
-def run(question):
-    gpt_3_5_index = get_search_index(open_ai_pkl, open_ai_index, open_ai_embeddings)
-    gpt_3_5_chain = load_qa_with_sources_chain(gpt_3_5, chain_type="stuff", verbose=True)
-    answer = generate_answer(gpt_3_5_chain, gpt_3_5_index, question)
-    return answer

+from utils import create_index, get_agent_chain, get_prompt_and_tools, get_search_index
+from utils import get_custom_agent
+question_starters = ['who', 'why', 'what', 'how', 'where', 'when', 'which', 'whom', 'whose']
+def run(question):
+    index = get_search_index()
+    # prompt, tools = get_prompt_and_tools()
+    # agent_chain = get_agent_chain(prompt, tools)
+    prompt, tools = get_prompt_and_tools_for_custom_agent()
+    agent_chain = get_custom_agent(prompt, tools)
+    result = None
+    try:
+        result = agent_chain.run(question)
+        print(result)
+    except ValueError as ve:
+        if "Could not parse LLM output:" in ve.args[0] and question.lower().startswith(tuple(question_starters)) and not question.lower().endswith('?'):
+            question = question + '?'
+            result = agent_chain.run(question)
+    return result

poetry.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml ADDED Viewed

	@@ -0,0 +1,25 @@

+[tool.poetry]
+name = "makerlab-bot"
+version = "0.1.0"
+description = "Assistant Bot to Makerlab"
+authors = ["rohan-uiuc <rohan13@illinois.edu>"]
+readme = "README.md"
+packages = [{include = "makerlab_bot"}]
+[tool.poetry.dependencies]
+python = "^3.9"
+faiss-cpu = "^1.7.3"
+langchain = "^0.0.131"
+beautifulsoup4 = "^4.12.0"
+pypdf2 = "^3.0.1"
+openai = "^0.27.4"
+flask = "^2.2.3"
+flask-socketio = "^5.3.3"
+flask-cors = "^3.0.10"
+gevent = "^22.10.2"
+gevent-websocket = "^0.10.1"
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"

requirements.txt CHANGED Viewed

@@ -1,5 +1,5 @@
-faiss-cpu==1.7.3
-langchain==0.0.117
 beautifulsoup4
 PyPDF2
 openai

+faiss-cpu
+langchain
 beautifulsoup4
 PyPDF2
 openai

static/chatbot.js CHANGED Viewed

@@ -23,7 +23,7 @@ $(document).ready(function() {
     // Function to display message
     function displayMessage(message, isUser) {
         var $message = $('<div>').addClass('chat-message round');
-        var $messageText = $('<p>').html(message.replace(/(https?:\/\/[^\s]+)/g, '<a href="$1">$1</a>'));
         $message.append($messageText);
         if (isUser) {

     // Function to display message
     function displayMessage(message, isUser) {
         var $message = $('<div>').addClass('chat-message round');
+        var $messageText = $('<p>').html(message.replace(/(https?:\/\/[^\s,]+)/g, '<a href="$1">$1</a>'));
         $message.append($messageText);
         if (isUser) {

utils.py CHANGED Viewed

@@ -1,36 +1,111 @@
 import os
 import pickle
 import time
 from urllib.parse import urlparse, urljoin
 import faiss
 import requests
 from PyPDF2 import PdfReader
 from bs4 import BeautifulSoup
 from langchain.docstore.document import Document
 from langchain.text_splitter import CharacterTextSplitter
 from langchain.vectorstores.faiss import FAISS
 book_url = 'https://g.co/kgs/2VFC7u'
 book_file = "Book.pdf"
 url = 'https://makerlab.illinois.edu/'
-def get_search_index(pickle_file, index_file, embeddings):
     if os.path.isfile(pickle_file) and os.path.isfile(index_file) and os.path.getsize(pickle_file) > 0:
         # Load index from pickle file
         with open(pickle_file, "rb") as f:
             search_index = pickle.load(f)
     else:
-        source_chunks = create_chunk_documents()
-        search_index = search_index_from_docs(source_chunks, embeddings=embeddings)
-        faiss.write_index(search_index.index, index_file)
-        # Save index to pickle file
-        with open(pickle_file, "wb") as f:
-            pickle.dump(search_index, f)
     return search_index
@@ -118,19 +193,143 @@ def get_document_data(book_file, book_url):
     # print("document list" + str(len(document_list)))
     return document_list
-def search_index_from_docs(source_chunks, embeddings):
     # Create index from chunk documents
     # print("Size of chunk" + str(len(source_chunks)))
     search_index = FAISS.from_texts([doc.page_content for doc in source_chunks], embeddings, metadatas=[doc.metadata for doc in source_chunks])
     return search_index
-def generate_answer(chain, index, question):
-    #Get answer
-    answer = chain(
-        {
-            "input_documents": index.similarity_search(question, k=4),
-            "question": question,
-        },
-        return_only_outputs=True,
-    )["output_text"]
-    return answer

 import os
 import pickle
+import re
 import time
+from typing import List, Union
 from urllib.parse import urlparse, urljoin
 import faiss
 import requests
 from PyPDF2 import PdfReader
 from bs4 import BeautifulSoup
+from langchain import OpenAI, LLMChain
+from langchain.agents import ConversationalAgent
+from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser
+from langchain.prompts import BaseChatPromptTemplate
+from langchain.chains import ConversationalRetrievalChain
 from langchain.docstore.document import Document
+from langchain.embeddings import OpenAIEmbeddings
+from langchain.memory import ConversationBufferWindowMemory
+from langchain.schema import AgentAction, AgentFinish, HumanMessage
 from langchain.text_splitter import CharacterTextSplitter
 from langchain.vectorstores.faiss import FAISS
 book_url = 'https://g.co/kgs/2VFC7u'
 book_file = "Book.pdf"
 url = 'https://makerlab.illinois.edu/'
+pickle_file = "open_ai.pkl"
+index_file = "open_ai.index"
+gpt_3_5 = OpenAI(model_name='gpt-3.5-turbo',temperature=0)
+embeddings = OpenAIEmbeddings()
+chat_history = []
+memory = ConversationBufferWindowMemory(memory_key="chat_history")
+gpt_3_5_index = None
+class CustomOutputParser(AgentOutputParser):
+    def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
+        # Check if agent replied without using tools
+        if "AI:" in llm_output:
+            return AgentFinish(return_values={"output": llm_output.split("AI:")[-1].strip()},
+                               log=llm_output)
+        # Check if agent should finish
+        if "Final Answer:" in llm_output:
+            return AgentFinish(
+                # Return values is generally always a dictionary with a single `output` key
+                # It is not recommended to try anything else at the moment :)
+                return_values={"output": llm_output.split("Final Answer:")[-1].strip()},
+                log=llm_output,
+            )
+        # Parse out the action and action input
+        regex = r"Action: (.*?)[\n]*Action Input:[\s]*(.*)"
+        match = re.search(regex, llm_output, re.DOTALL)
+        if not match:
+            raise ValueError(f"Could not parse LLM output: `{llm_output}`")
+        action = match.group(1).strip()
+        action_input = match.group(2)
+        # Return the action and action input
+        return AgentAction(tool=action, tool_input=action_input.strip(" ").strip('"'), log=llm_output)
+# Set up a prompt template
+class CustomPromptTemplate(BaseChatPromptTemplate):
+    # The template to use
+    template: str
+    # The list of tools available
+    tools: List[Tool]
+    def format_messages(self, **kwargs) -> str:
+        # Get the intermediate steps (AgentAction, Observation tuples)
+        # Format them in a particular way
+        intermediate_steps = kwargs.pop("intermediate_steps")
+        thoughts = ""
+        for action, observation in intermediate_steps:
+            thoughts += action.log
+            thoughts += f"\nObservation: {observation}\nThought: "
+        # Set the agent_scratchpad variable to that value
+        kwargs["agent_scratchpad"] = thoughts
+        # Create a tools variable from the list of tools provided
+        kwargs["tools"] = "\n".join([f"{tool.name}: {tool.description}" for tool in self.tools])
+        # Create a list of tool names for the tools provided
+        kwargs["tool_names"] = ", ".join([tool.name for tool in self.tools])
+        formatted = self.template.format(**kwargs)
+        return [HumanMessage(content=formatted)]
+def get_search_index():
+    global gpt_3_5_index
     if os.path.isfile(pickle_file) and os.path.isfile(index_file) and os.path.getsize(pickle_file) > 0:
         # Load index from pickle file
         with open(pickle_file, "rb") as f:
             search_index = pickle.load(f)
     else:
+        search_index = create_index()
+    gpt_3_5_index = search_index
+def create_index():
+    source_chunks = create_chunk_documents()
+    search_index = search_index_from_docs(source_chunks)
+    faiss.write_index(search_index.index, index_file)
+    # Save index to pickle file
+    with open(pickle_file, "wb") as f:
+        pickle.dump(search_index, f)
     return search_index
     # print("document list" + str(len(document_list)))
     return document_list
+def search_index_from_docs(source_chunks):
     # Create index from chunk documents
     # print("Size of chunk" + str(len(source_chunks)))
     search_index = FAISS.from_texts([doc.page_content for doc in source_chunks], embeddings, metadatas=[doc.metadata for doc in source_chunks])
     return search_index
+def get_qa_chain(gpt_3_5_index):
+    global gpt_3_5
+    print("index: " + str(gpt_3_5_index))
+    return ConversationalRetrievalChain.from_llm(gpt_3_5, chain_type="stuff", get_chat_history=get_chat_history,
+                                                 retriever=gpt_3_5_index.as_retriever(), return_source_documents=True, verbose=True)
+def get_chat_history(inputs) -> str:
+    res = []
+    for human, ai in inputs:
+        res.append(f"Human:{human}\nAI:{ai}")
+    return "\n".join(res)
+def generate_answer(question) -> str:
+    global chat_history, gpt_3_5_index
+    gpt_3_5_chain = get_qa_chain(gpt_3_5_index)
+    result = gpt_3_5_chain(
+        {"question": question, "chat_history": chat_history,"vectordbkwargs": {"search_distance": 0.8}})
+    print("REsult: " + str(result))
+    chat_history = [(question, result["answer"])]
+    sources = []
+    for document in result['source_documents']:
+        source = document.metadata['source']
+        sources.append(source)
+    source = ',\n'.join(set(sources))
+    return result['answer'] + '\nSOURCES: ' + source
+def get_agent_chain(prompt, tools):
+    global gpt_3_5
+    # output_parser = CustomOutputParser()
+    llm_chain = LLMChain(llm=gpt_3_5, prompt=prompt)
+    agent = ConversationalAgent(llm_chain=llm_chain, tools=tools, verbose=True)
+    agent_chain = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True, memory=memory,
+                                                     intermediate_steps=True)
+    return agent_chain
+def get_prompt_and_tools():
+    tools = get_tools()
+    prefix = """Have a conversation with a human, answering the following questions as best you can.
+    Always try to use Vectorstore first.
+    Your name is Makerlab Bot because you are a personal assistant of Makerlab. You have access to the following tools:"""
+    suffix = """Begin! If you use any tool, ALWAYS return a "SOURCES" part in your answer"
+    {chat_history}
+    Question: {input}
+    {agent_scratchpad}
+    SOURCES:"""
+    prompt = ConversationalAgent.create_prompt(
+        tools,
+        prefix=prefix,
+        suffix=suffix,
+        input_variables=["input", "chat_history", "agent_scratchpad"]
+    )
+    # print("Template: " + prompt.template)
+    return prompt, tools
+def get_tools():
+    tools = [
+        Tool(
+            name="Vectorstore",
+            func=generate_answer,
+            description="useful for when you need to answer questions about the Makerlab or 3D Printing.",
+            return_direct=True
+        )]
+    return tools
+def get_custom_agent(prompt, tools):
+    llm_chain = LLMChain(llm=gpt_3_5, prompt=prompt)
+    output_parser = CustomOutputParser()
+    tool_names = [tool.name for tool in tools]
+    agent = LLMSingleActionAgent(
+        llm_chain=llm_chain,
+        output_parser=output_parser,
+        stop=["\nObservation:"],
+        allowed_tools=tool_names
+    )
+    agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True, memory=memory,
+                                                        intermediate_steps=True)
+    return agent_executor
+def get_prompt_and_tools_for_custom_agent():
+    template = """
+    Have a conversation with a human, answering the following questions as best you can.
+    Always try to use Vectorstore first.
+    Your name is Makerlab Bot because you are a personal assistant of Makerlab. You have access to the following tools:
+    {tools}
+    To answer for the new input, use the following format:
+    New Input: the input question you must answer
+    Thought: Do I need to use a tool? Yes
+    Action: the action to take, should be one of [{tool_names}]
+    Action Input: the input to the action
+    Observation: the result of the action
+    ... (this Thought/Action/Action Input/Observation can repeat N times)
+    Thought: I now know the final answer
+    Final Answer: the final answer to the original input question. SOURCES: the sources referred to find the final answer
+    When you have a response to say to the Human and DO NOT need to use a tool:
+    1. DO NOT return "SOURCES" if you did not use any tool.
+    2. You MUST use this format:
+    ```
+    Thought: Do I need to use a tool? No
+    AI: [your response here]
+    ```
+    Begin! Remember to speak as a personal assistant when giving your final answer.
+    ALWAYS return a "SOURCES" part in your answer, if you used any tool.
+    Previous conversation history:
+    {chat_history}
+    New input: {input}
+    {agent_scratchpad}
+    SOURCES:"""
+    tools = get_tools()
+    prompt = CustomPromptTemplate(
+        template=template,
+        tools=tools,
+        # This omits the `agent_scratchpad`, `tools`, and `tool_names` variables because those are generated dynamically
+        # This includes the `intermediate_steps` variable because that is needed
+        input_variables=["input", "intermediate_steps", "chat_history"]
+    )
+    return prompt, tools