ArturG9 commited on
Commit
305c673
1 Parent(s): 1d9fbcf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -10
app.py CHANGED
@@ -32,7 +32,7 @@ os.environ["LANGCHAIN_PROJECT"] = "Chat with multiple PDFs"
32
 
33
 
34
  def create_retriever_from_chroma(vectorstore_path="docs/chroma/", search_type='mmr', k=7, chunk_size=250, chunk_overlap=20):
35
- data_path = "data"
36
  model_name = "Alibaba-NLP/gte-base-en-v1.5"
37
  model_kwargs = {'device': 'cpu',
38
  "trust_remote_code" : 'True'}
@@ -65,16 +65,10 @@ def create_retriever_from_chroma(vectorstore_path="docs/chroma/", search_type='m
65
  vectorstore = Chroma(persist_directory=vectorstore_path,embedding_function=embeddings)
66
  else:
67
  # Load documents from the specified data path
68
- documents = []
69
- for filename in os.listdir(data_path):
70
- if filename.endswith('.txt'):
71
- file_path = os.path.join(data_path, filename)
72
- loaded_docs = TextLoader(file_path).load()
73
- documents.extend(loaded_docs)
74
-
75
- # Split documents into chunks
76
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
77
- split_docs = text_splitter.split_documents(documents)
78
 
79
 
80
 
 
32
 
33
 
34
  def create_retriever_from_chroma(vectorstore_path="docs/chroma/", search_type='mmr', k=7, chunk_size=250, chunk_overlap=20):
35
+
36
  model_name = "Alibaba-NLP/gte-base-en-v1.5"
37
  model_kwargs = {'device': 'cpu',
38
  "trust_remote_code" : 'True'}
 
65
  vectorstore = Chroma(persist_directory=vectorstore_path,embedding_function=embeddings)
66
  else:
67
  # Load documents from the specified data path
68
+ loader = DirectoryLoader('./data', glob="./*.txt", loader_cls=TextLoader)
69
+ docs = loader.load()
 
 
 
 
 
 
70
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
71
+ split_docs = text_splitter.split_documents(docs)
72
 
73
 
74