Commit
•
ebc1e09
1
Parent(s):
664f71e
Update geneformer/emb_extractor.py (#453)
Browse files- Update geneformer/emb_extractor.py (2c8d3f5d8ebb362ad102cb7e924d84a39b7349c8)
- Update geneformer/emb_extractor.py (62074538699215f6f0f8aca01d2e8f974386d800)
Co-authored-by: Han Chen <hchen725@users.noreply.huggingface.co>
geneformer/emb_extractor.py
CHANGED
@@ -596,6 +596,12 @@ class EmbExtractor:
|
|
596 |
filtered_input_data = pu.load_and_filter(
|
597 |
self.filter_data, self.nproc, input_data_file
|
598 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
599 |
if cell_state is not None:
|
600 |
filtered_input_data = pu.filter_by_dict(
|
601 |
filtered_input_data, cell_state, self.nproc
|
|
|
596 |
filtered_input_data = pu.load_and_filter(
|
597 |
self.filter_data, self.nproc, input_data_file
|
598 |
)
|
599 |
+
|
600 |
+
# Check to make sure that all the labels exist in the tokenized data:
|
601 |
+
if self.emb_label is not None:
|
602 |
+
for label in self.emb_label:
|
603 |
+
assert label in filtered_input_data.features.keys(), f"Attribute `{label}` not present in dataset features"
|
604 |
+
|
605 |
if cell_state is not None:
|
606 |
filtered_input_data = pu.filter_by_dict(
|
607 |
filtered_input_data, cell_state, self.nproc
|