Tonic commited on
Commit
930288d
1 Parent(s): a65fdff

improve pass metadata

Browse files
Files changed (1) hide show
  1. app.py +18 -8
app.py CHANGED
@@ -112,10 +112,13 @@ class MyEmbeddingFunction(EmbeddingFunction):
112
  def __init__(self, embedding_generator: EmbeddingGenerator):
113
  self.embedding_generator = embedding_generator
114
 
115
- def __call__(self, input: Documents) -> Embeddings:
116
- embeddings = [self.embedding_generator.compute_embeddings(doc) for doc in input]
117
- embeddings = [item for sublist in embeddings for item in sublist]
118
- return embeddings
 
 
 
119
 
120
  def load_documents(file_path: str, mode: str = "elements"):
121
  loader = UnstructuredFileLoader(file_path, mode=mode)
@@ -130,8 +133,15 @@ def initialize_chroma(collection_name: str, embedding_function: MyEmbeddingFunct
130
 
131
  def add_documents_to_chroma(client, collection, documents: list, embedding_function: MyEmbeddingFunction):
132
  for doc in documents:
133
- collection.add(ids=[str(uuid.uuid1())], documents=[doc], embeddings=embedding_function([doc]))
134
-
 
 
 
 
 
 
 
135
  def query_chroma(client, collection_name: str, query_text: str, embedding_function: MyEmbeddingFunction):
136
  db = Chroma(client=client, collection_name=collection_name, embedding_function=embedding_function)
137
  result_docs = db.similarity_search(query_text)
@@ -177,8 +187,8 @@ def upload_documents(files):
177
  for file in files:
178
  loader = UnstructuredFileLoader(file.name)
179
  documents = loader.load_documents()
180
- add_documents_to_chroma(documents)
181
- return "Documents uploaded and processed successfully!"
182
 
183
  def query_documents(query):
184
  results = query_chroma(query)
 
112
  def __init__(self, embedding_generator: EmbeddingGenerator):
113
  self.embedding_generator = embedding_generator
114
 
115
+ def __call__(self, input: Documents) -> (Embeddings, list):
116
+ embeddings_with_metadata = [self.embedding_generator.compute_embeddings(doc) for doc in input]
117
+ embeddings = [item[0] for item in embeddings_with_metadata]
118
+ metadata = [item[1] for item in embeddings_with_metadata]
119
+ embeddings_flattened = [emb for sublist in embeddings for emb in sublist]
120
+ metadata_flattened = [meta for sublist in metadata for meta in sublist]
121
+ return embeddings_flattened, metadata_flattened
122
 
123
  def load_documents(file_path: str, mode: str = "elements"):
124
  loader = UnstructuredFileLoader(file_path, mode=mode)
 
133
 
134
  def add_documents_to_chroma(client, collection, documents: list, embedding_function: MyEmbeddingFunction):
135
  for doc in documents:
136
+ embeddings, metadata = embedding_function.embedding_generator.compute_embeddings(doc)
137
+ for embedding, meta in zip(embeddings, metadata):
138
+ collection.add(
139
+ ids=[str(uuid.uuid1())],
140
+ documents=[doc],
141
+ embeddings=[embedding],
142
+ metadatas=[meta]
143
+ )
144
+
145
  def query_chroma(client, collection_name: str, query_text: str, embedding_function: MyEmbeddingFunction):
146
  db = Chroma(client=client, collection_name=collection_name, embedding_function=embedding_function)
147
  result_docs = db.similarity_search(query_text)
 
187
  for file in files:
188
  loader = UnstructuredFileLoader(file.name)
189
  documents = loader.load_documents()
190
+ add_documents_to_chroma(chroma_client, chroma_collection, documents, embedding_function)
191
+ return "Documents uploaded and processed successfully!"
192
 
193
  def query_documents(query):
194
  results = query_chroma(query)