Added truncation for long sequences
Browse files- BertForPrefixMarking.py +1 -1
BertForPrefixMarking.py
CHANGED
@@ -159,7 +159,7 @@ class BertForPrefixMarking(BertPreTrainedModel):
|
|
159 |
|
160 |
|
161 |
def encode_sentences_for_bert_for_prefix_marking(tokenizer: BertTokenizerFast, sentences: List[str], padding='longest'):
|
162 |
-
inputs = tokenizer(sentences, padding=padding, return_tensors='pt')
|
163 |
|
164 |
# create our prefix_id_options array which will be like the input ids shape but with an addtional
|
165 |
# dimension containing for each prefix whether it can be for that word
|
|
|
159 |
|
160 |
|
161 |
def encode_sentences_for_bert_for_prefix_marking(tokenizer: BertTokenizerFast, sentences: List[str], padding='longest'):
|
162 |
+
inputs = tokenizer(sentences, padding=padding, truncation=True, return_tensors='pt')
|
163 |
|
164 |
# create our prefix_id_options array which will be like the input ids shape but with an addtional
|
165 |
# dimension containing for each prefix whether it can be for that word
|