Commit
•
075bd53
1
Parent(s):
aa25cd2
Fix typo (#301)
Browse files- Fix typo (d1bbf272387475f1c413cca8679150549abd8bed)
Co-authored-by: Han Chen <hchen725@users.noreply.huggingface.co>
- geneformer/tokenizer.py +1 -1
geneformer/tokenizer.py
CHANGED
@@ -366,7 +366,7 @@ class TranscriptomeTokenizer:
|
|
366 |
example["length_uncropped"] = len(example["input_ids"])
|
367 |
|
368 |
# Truncate/Crop input_ids to input size
|
369 |
-
if
|
370 |
example["input_ids"] = example["input_ids"][0:self.input_size-2] # truncate to leave space for CLS and SEP token
|
371 |
example["input_ids"] = np.insert(example["input_ids"], 0, self.gene_token_dict.get("<cls>"))
|
372 |
example["input_ids"] = np.insert(example["input_ids"], len(example["input_ids"]), self.gene_token_dict.get("<sep>"))
|
|
|
366 |
example["length_uncropped"] = len(example["input_ids"])
|
367 |
|
368 |
# Truncate/Crop input_ids to input size
|
369 |
+
if self.special_token:
|
370 |
example["input_ids"] = example["input_ids"][0:self.input_size-2] # truncate to leave space for CLS and SEP token
|
371 |
example["input_ids"] = np.insert(example["input_ids"], 0, self.gene_token_dict.get("<cls>"))
|
372 |
example["input_ids"] = np.insert(example["input_ids"], len(example["input_ids"]), self.gene_token_dict.get("<sep>"))
|