Spaces:

ArturG9
/

Local_Lithuanian_Law_RAG_QA_ChatBot_Streamlit

Sleeping

ArturG9 commited on Jul 13

Commit

5d24867

•

1 Parent(s): a049857

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -45,23 +45,28 @@ def create_retriever_from_chroma(vectorstore_path="docs/chroma/", search_type='m
     # Check if vectorstore exists
-    #if os.path.exists(vectorstore_path) and os.listdir(vectorstore_path):
         # Load the existing vectorstore
-    #    vectorstore = Chroma(persist_directory=vectorstore_path,embedding_function=embeddings)
-    #else:
         # Load documents from the specified data path
-    loader = DirectoryLoader('./data/', glob="./*.txt", loader_cls=TextLoader)
-    docs = loader.load()
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap,separators=["\n \n \n", "\n \n", "\n1" , "(?<=\. )", " ", ""])
-    split_docs = text_splitter.split_documents(docs)
         # Create the vectorstore
-    vectorstore = Chroma.from_documents(
             documents=split_docs, embedding=embeddings, persist_directory=vectorstore_path
         )
     retriever=vectorstore.as_retriever(search_type = search_type, search_kwargs={"k": k})

     # Check if vectorstore exists
+    if os.path.exists(vectorstore_path) and os.listdir(vectorstore_path):
         # Load the existing vectorstore
+        st.write("Vector store exists and is loaded")
+        vectorstore = Chroma(persist_directory=vectorstore_path,embedding_function=embeddings)
+    else:
         # Load documents from the specified data path
+        st.write("Vector store doesnt exist and will be created now")
+        loader = DirectoryLoader('./data/', glob="./*.txt", loader_cls=TextLoader)
+        docs = loader.load()
+        st.write("Docs loaded")
+        text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap,separators=["\n \n \n", "\n \n", "\n1" , "(?<=\. )", " ", ""])
+        split_docs = text_splitter.split_documents(docs)
         # Create the vectorstore
+        vectorstore = Chroma.from_documents(
             documents=split_docs, embedding=embeddings, persist_directory=vectorstore_path
         )
+        st.write("VectorStore is created")
     retriever=vectorstore.as_retriever(search_type = search_type, search_kwargs={"k": k})