ctheodoris hchen725 commited on
Commit
ebc1e09
1 Parent(s): 664f71e

Update geneformer/emb_extractor.py (#453)

Browse files

- Update geneformer/emb_extractor.py (2c8d3f5d8ebb362ad102cb7e924d84a39b7349c8)
- Update geneformer/emb_extractor.py (62074538699215f6f0f8aca01d2e8f974386d800)


Co-authored-by: Han Chen <hchen725@users.noreply.huggingface.co>

Files changed (1) hide show
  1. geneformer/emb_extractor.py +6 -0
geneformer/emb_extractor.py CHANGED
@@ -596,6 +596,12 @@ class EmbExtractor:
596
  filtered_input_data = pu.load_and_filter(
597
  self.filter_data, self.nproc, input_data_file
598
  )
 
 
 
 
 
 
599
  if cell_state is not None:
600
  filtered_input_data = pu.filter_by_dict(
601
  filtered_input_data, cell_state, self.nproc
 
596
  filtered_input_data = pu.load_and_filter(
597
  self.filter_data, self.nproc, input_data_file
598
  )
599
+
600
+ # Check to make sure that all the labels exist in the tokenized data:
601
+ if self.emb_label is not None:
602
+ for label in self.emb_label:
603
+ assert label in filtered_input_data.features.keys(), f"Attribute `{label}` not present in dataset features"
604
+
605
  if cell_state is not None:
606
  filtered_input_data = pu.filter_by_dict(
607
  filtered_input_data, cell_state, self.nproc