Spaces:

tdecae
/

chatbot

Running

App Files Files Community

tdecae commited on Oct 14, 2023

Commit

5ddd792

•

1 Parent(s): 553cbf6

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -30

app.py CHANGED Viewed

@@ -8,47 +8,74 @@ from langchain.embeddings import OpenAIEmbeddings
 from langchain.indexes import VectorstoreIndexCreator
 from langchain.indexes.vectorstore import VectorStoreIndexWrapper
 from langchain.llms import OpenAI
 __import__('pysqlite3')
 import sys
 sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
 from langchain.vectorstores import Chroma
 os.environ["OPENAI_API_KEY"] = os.getenv("OPENAPIKEY")
-# Enable to save to disk & reuse the model (for repeated queries on the same data)
-PERSIST = False
-query = None
-if len(sys.argv) > 1:
-  query = sys.argv[1]
-if PERSIST and os.path.exists("persist"):
-  print("Reusing index...\n")
-  vectorstore = Chroma(persist_directory="persist", embedding_function=OpenAIEmbeddings())
-  index = VectorStoreIndexWrapper(vectorstore=vectorstore)
-else:
-  loader = TextLoader("input/input_data.txt") # Use this line if you only need data.txt
-  # loader = DirectoryLoader("data/")
-  if PERSIST:
-    index = VectorstoreIndexCreator(vectorstore_kwargs={"persist_directory":"persist"}).from_loaders([loader])
-  else:
-    index = VectorstoreIndexCreator().from_loaders([loader])
 chain = ConversationalRetrievalChain.from_llm(
-  llm=ChatOpenAI(model="gpt-3.5-turbo"),
-  retriever=index.vectorstore.as_retriever(search_kwargs={"k": 1}),
 )
 chat_history = []
-while True:
-  if not query:
-    query = input("Prompt: ")
-  if query in ['quit', 'q', 'exit']:
-    sys.exit()
-  result = chain({"question": query, "chat_history": chat_history})
-  print(result['answer'])
-  chat_history.append((query, result['answer']))
-  query = None

 from langchain.indexes import VectorstoreIndexCreator
 from langchain.indexes.vectorstore import VectorStoreIndexWrapper
 from langchain.llms import OpenAI
+from langchain.text_splitter import CharacterTextSplitter
 __import__('pysqlite3')
 import sys
 sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
 from langchain.vectorstores import Chroma
+import gradio as gr
 os.environ["OPENAI_API_KEY"] = os.getenv("OPENAPIKEY")
+docs = []
+for f in os.listdir("./"):
+    if f.endswith(".pdf"):
+        pdf_path = "./" + f
+        loader = PyPDFLoader(pdf_path)
+        docs.extend(loader.load())
+    elif f.endswith('.docx') or f.endswith('.doc'):
+        doc_path = "./" + f
+        loader = Docx2txtLoader(doc_path)
+        docs.extend(loader.load())
+    elif f.endswith('.txt'):
+        text_path = "./" + f
+        loader = TextLoader(text_path)
+        docs.extend(loader.load())
+splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
+docs = splitter.split_documents(docs)
+# Convert the document chunks to embedding and save them to the vector store
+vectorstore = Chroma.from_documents(docs, embedding=OpenAIEmbeddings(), persist_directory="./data")
+vectorstore.persist()
 chain = ConversationalRetrievalChain.from_llm(
+    ChatOpenAI(temperature=0.7, model_name='gpt-3.5-turbo'),
+    retriever=vectorstore.as_retriever(search_kwargs={'k': 6}),
+    return_source_documents=True,
+    verbose=False
 )
 chat_history = []
+with gr.Blocks() as demo:
+    chatbot = gr.Chatbot([("", "Hello, I'm Thierry Decae's chatbot, you can ask me any recruitment relaged questions such as my previous experience, where i'm eligible to work, when I can start work, my most recent experience, what NLP skills I have, and much more!")],avatar_images=["./input/avatar/Guest.jpg","./input/avatar/Thierry Picture.jpg"])
+    msg = gr.Textbox()
+    clear = gr.Button("Clear")
+    chat_history = []
+    def user(query, chat_history):
+        # print("User query:", query)
+        # print("Chat history:", chat_history)
+        # Convert chat history to list of tuples
+        chat_history_tuples = []
+        for message in chat_history:
+            chat_history_tuples.append((message[0], message[1]))
+        # Get result from QA chain
+        result = chain({"question": query, "chat_history": chat_history_tuples})
+        # Append user message and response to chat history
+        chat_history.append((query, result["answer"]))
+        # print("Updated chat history:", chat_history)
+        return gr.update(value=""), chat_history
+    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False)
+    clear.click(lambda: None, None, chatbot, queue=False)
+demo.launch(debug=True)