Junde commited on
Commit
b1633e6
1 Parent(s): 5f6652b

Update tokenization_iPLM.py

Browse files

fix bug when not passing '|' in input seq

Files changed (1) hide show
  1. tokenization_iPLM.py +1 -1
tokenization_iPLM.py CHANGED
@@ -66,7 +66,7 @@ class iPLMTokenizer(PreTrainedTokenizerFast):
66
 
67
  attn_mask_prefix[i] = True
68
  else:
69
- raw_text.append(text)
70
 
71
  batch = super().__call__(raw_text, text_pair, text_target, text_pair_target, add_special_tokens, padding, truncation, max_length, stride, is_split_into_words, pad_to_multiple_of, return_tensors, return_token_type_ids, return_attention_mask, return_overflowing_tokens, return_special_tokens_mask, return_offsets_mapping, return_length, verbose, **kwargs)
72
 
 
66
 
67
  attn_mask_prefix[i] = True
68
  else:
69
+ raw_text.append(text[i])
70
 
71
  batch = super().__call__(raw_text, text_pair, text_target, text_pair_target, add_special_tokens, padding, truncation, max_length, stride, is_split_into_words, pad_to_multiple_of, return_tensors, return_token_type_ids, return_attention_mask, return_overflowing_tokens, return_special_tokens_mask, return_offsets_mapping, return_length, verbose, **kwargs)
72