Spaces:

ArturG9
/

Local_Lithuanian_Law_RAG_QA_ChatBot_Streamlit

Sleeping

ArturG9 commited on Jul 12

Commit

305c673

•

1 Parent(s): 1d9fbcf

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -32,7 +32,7 @@ os.environ["LANGCHAIN_PROJECT"] = "Chat with multiple PDFs"
 def create_retriever_from_chroma(vectorstore_path="docs/chroma/", search_type='mmr', k=7, chunk_size=250, chunk_overlap=20):
-    data_path = "data"
     model_name = "Alibaba-NLP/gte-base-en-v1.5"
     model_kwargs = {'device': 'cpu',
                    "trust_remote_code" : 'True'}
@@ -65,16 +65,10 @@ def create_retriever_from_chroma(vectorstore_path="docs/chroma/", search_type='m
         vectorstore = Chroma(persist_directory=vectorstore_path,embedding_function=embeddings)
     else:
         # Load documents from the specified data path
-        documents = []
-        for filename in os.listdir(data_path):
-            if filename.endswith('.txt'):
-                file_path = os.path.join(data_path, filename)
-                loaded_docs = TextLoader(file_path).load()
-                documents.extend(loaded_docs)
-        # Split documents into chunks
         text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
-        split_docs = text_splitter.split_documents(documents)

 def create_retriever_from_chroma(vectorstore_path="docs/chroma/", search_type='mmr', k=7, chunk_size=250, chunk_overlap=20):
     model_name = "Alibaba-NLP/gte-base-en-v1.5"
     model_kwargs = {'device': 'cpu',
                    "trust_remote_code" : 'True'}
         vectorstore = Chroma(persist_directory=vectorstore_path,embedding_function=embeddings)
     else:
         # Load documents from the specified data path
+        loader = DirectoryLoader('./data', glob="./*.txt", loader_cls=TextLoader)
+        docs = loader.load()
         text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
+        split_docs = text_splitter.split_documents(docs)