XThomasBU commited on
Commit
5cd7fa4
1 Parent(s): 34aaae9

remove hardcoded

Browse files
README.md CHANGED
@@ -37,7 +37,7 @@ Please visit [setup](https://dl4ds.github.io/dl4ds_tutor/guide/setup/) for more
37
  3. **To test Data Loading (Optional)**
38
  ```bash
39
  cd code
40
- python -m modules.dataloader.data_loader
41
  ```
42
 
43
  4. **Create the Vector Database**
 
37
  3. **To test Data Loading (Optional)**
38
  ```bash
39
  cd code
40
+ python -m modules.dataloader.data_loader --links "your_pdf_link"
41
  ```
42
 
43
  4. **Create the Vector Database**
code/modules/dataloader/data_loader.py CHANGED
@@ -417,6 +417,18 @@ class DataLoader:
417
 
418
  if __name__ == "__main__":
419
  import yaml
 
 
 
 
 
 
 
 
 
 
 
 
420
 
421
  logger = logging.getLogger(__name__)
422
  logger.setLevel(logging.INFO)
@@ -445,9 +457,7 @@ if __name__ == "__main__":
445
  documents,
446
  document_metadata,
447
  ) = data_loader.get_chunks(
448
- [
449
- "https://dl4ds.github.io/fa2024/static_files/discussion_slides/00_discussion.pdf"
450
- ],
451
  [],
452
  )
453
 
 
417
 
418
  if __name__ == "__main__":
419
  import yaml
420
+ import argparse
421
+
422
+ parser = argparse.ArgumentParser(description="Process some links.")
423
+ parser.add_argument(
424
+ '--links',
425
+ nargs='+',
426
+ required=True,
427
+ help="List of links to process."
428
+ )
429
+
430
+ args = parser.parse_args()
431
+ links_to_process = args.links
432
 
433
  logger = logging.getLogger(__name__)
434
  logger.setLevel(logging.INFO)
 
457
  documents,
458
  document_metadata,
459
  ) = data_loader.get_chunks(
460
+ links_to_process,
 
 
461
  [],
462
  )
463