Liam Dyer commited on
Commit
bab324c
1 Parent(s): ec76910

idk if i could do this less efficiently

Browse files
Files changed (1) hide show
  1. app.py +10 -1
app.py CHANGED
@@ -153,7 +153,10 @@ def predict(queries, documents, max_characters) -> list[list[str]]:
153
  # Getting a structure like [[chunk, ...]]
154
  document_embeddings = [[] for _ in range(len(documents))]
155
  total_chars = 0
156
- while total_chars < max_characters:
 
 
 
157
  for query, doc_scores in query_embeddings.items():
158
  if len(doc_scores) == 0:
159
  continue
@@ -176,6 +179,12 @@ def predict(queries, documents, max_characters) -> list[list[str]]:
176
  document_embeddings[doc_idx].append(chunk_idx)
177
  total_chars += len(chunk)
178
 
 
 
 
 
 
 
179
  return document_embeddings
180
 
181
 
 
153
  # Getting a structure like [[chunk, ...]]
154
  document_embeddings = [[] for _ in range(len(documents))]
155
  total_chars = 0
156
+ while (
157
+ total_chars < max_characters
158
+ and sum([len(x) for x in query_embeddings.values()]) > 0
159
+ ):
160
  for query, doc_scores in query_embeddings.items():
161
  if len(doc_scores) == 0:
162
  continue
 
179
  document_embeddings[doc_idx].append(chunk_idx)
180
  total_chars += len(chunk)
181
 
182
+ # Get the actual text for the chunks
183
+ document_embeddings = [
184
+ [chunked_docs[doc_idx][chunk_idx] for chunk_idx in chunks]
185
+ for doc_idx, chunks in enumerate(document_embeddings)
186
+ ]
187
+
188
  return document_embeddings
189
 
190