XThomasBU commited on
Commit
49140fa
1 Parent(s): 4265034

reverted simplistic check for informative changes

Browse files
code/modules/dataloader/data_loader.py CHANGED
@@ -223,8 +223,8 @@ class ChunkProcessor:
223
  file_metadata = {}
224
 
225
  for doc in documents:
226
- if len(doc.page_content) <= 400:
227
- continue
228
 
229
  page_num = doc.metadata.get("page", 0)
230
  file_data[page_num] = doc.page_content
 
223
  file_metadata = {}
224
 
225
  for doc in documents:
226
+ # if len(doc.page_content) <= 400: # better approach to filter out non-informative documents
227
+ # continue
228
 
229
  page_num = doc.metadata.get("page", 0)
230
  file_data[page_num] = doc.page_content