InstructPLM
/

MPNN-ProGen2-xlarge-CATH42

Text Generation

Model card Files Files and versions Community

Junde commited on Jun 6

Commit

b1633e6

•

1 Parent(s): 5f6652b

Update tokenization_iPLM.py

fix bug when not passing '|' in input seq

Files changed (1) hide show

tokenization_iPLM.py +1 -1

tokenization_iPLM.py CHANGED Viewed

@@ -66,7 +66,7 @@ class iPLMTokenizer(PreTrainedTokenizerFast):
                     attn_mask_prefix[i] = True
             else:
-                raw_text.append(text)
         batch = super().__call__(raw_text, text_pair, text_target, text_pair_target, add_special_tokens, padding, truncation, max_length, stride, is_split_into_words, pad_to_multiple_of, return_tensors, return_token_type_ids, return_attention_mask, return_overflowing_tokens, return_special_tokens_mask, return_offsets_mapping, return_length, verbose, **kwargs)

                     attn_mask_prefix[i] = True
             else:
+                raw_text.append(text[i])
         batch = super().__call__(raw_text, text_pair, text_target, text_pair_target, add_special_tokens, padding, truncation, max_length, stride, is_split_into_words, pad_to_multiple_of, return_tensors, return_token_type_ids, return_attention_mask, return_overflowing_tokens, return_special_tokens_mask, return_offsets_mapping, return_length, verbose, **kwargs)