Update app.py
Browse files
app.py
CHANGED
@@ -32,7 +32,7 @@ os.environ["LANGCHAIN_PROJECT"] = "Chat with multiple PDFs"
|
|
32 |
|
33 |
|
34 |
def create_retriever_from_chroma(vectorstore_path="docs/chroma/", search_type='mmr', k=7, chunk_size=250, chunk_overlap=20):
|
35 |
-
|
36 |
model_name = "Alibaba-NLP/gte-base-en-v1.5"
|
37 |
model_kwargs = {'device': 'cpu',
|
38 |
"trust_remote_code" : 'True'}
|
@@ -65,16 +65,10 @@ def create_retriever_from_chroma(vectorstore_path="docs/chroma/", search_type='m
|
|
65 |
vectorstore = Chroma(persist_directory=vectorstore_path,embedding_function=embeddings)
|
66 |
else:
|
67 |
# Load documents from the specified data path
|
68 |
-
|
69 |
-
|
70 |
-
if filename.endswith('.txt'):
|
71 |
-
file_path = os.path.join(data_path, filename)
|
72 |
-
loaded_docs = TextLoader(file_path).load()
|
73 |
-
documents.extend(loaded_docs)
|
74 |
-
|
75 |
-
# Split documents into chunks
|
76 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
77 |
-
split_docs = text_splitter.split_documents(
|
78 |
|
79 |
|
80 |
|
|
|
32 |
|
33 |
|
34 |
def create_retriever_from_chroma(vectorstore_path="docs/chroma/", search_type='mmr', k=7, chunk_size=250, chunk_overlap=20):
|
35 |
+
|
36 |
model_name = "Alibaba-NLP/gte-base-en-v1.5"
|
37 |
model_kwargs = {'device': 'cpu',
|
38 |
"trust_remote_code" : 'True'}
|
|
|
65 |
vectorstore = Chroma(persist_directory=vectorstore_path,embedding_function=embeddings)
|
66 |
else:
|
67 |
# Load documents from the specified data path
|
68 |
+
loader = DirectoryLoader('./data', glob="./*.txt", loader_cls=TextLoader)
|
69 |
+
docs = loader.load()
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
71 |
+
split_docs = text_splitter.split_documents(docs)
|
72 |
|
73 |
|
74 |
|