Update tokenization_iPLM.py
Browse filesfix bug when not passing '|' in input seq
- tokenization_iPLM.py +1 -1
tokenization_iPLM.py
CHANGED
@@ -66,7 +66,7 @@ class iPLMTokenizer(PreTrainedTokenizerFast):
|
|
66 |
|
67 |
attn_mask_prefix[i] = True
|
68 |
else:
|
69 |
-
raw_text.append(text)
|
70 |
|
71 |
batch = super().__call__(raw_text, text_pair, text_target, text_pair_target, add_special_tokens, padding, truncation, max_length, stride, is_split_into_words, pad_to_multiple_of, return_tensors, return_token_type_ids, return_attention_mask, return_overflowing_tokens, return_special_tokens_mask, return_offsets_mapping, return_length, verbose, **kwargs)
|
72 |
|
|
|
66 |
|
67 |
attn_mask_prefix[i] = True
|
68 |
else:
|
69 |
+
raw_text.append(text[i])
|
70 |
|
71 |
batch = super().__call__(raw_text, text_pair, text_target, text_pair_target, add_special_tokens, padding, truncation, max_length, stride, is_split_into_words, pad_to_multiple_of, return_tensors, return_token_type_ids, return_attention_mask, return_overflowing_tokens, return_special_tokens_mask, return_offsets_mapping, return_length, verbose, **kwargs)
|
72 |
|