Fixed special tokens map to not ignore unk
Browse files- BertForMorphTagging.py +1 -1
BertForMorphTagging.py
CHANGED
@@ -157,7 +157,7 @@ class BertForMorphTagging(BertPreTrainedModel):
|
|
157 |
# for each sentence, return a dict object with the following files { text, tokens }
|
158 |
# Where tokens is a list of dicts, where each dict is:
|
159 |
# { pos: str, feats: dict, prefixes: List[str], suffix: str | bool, suffix_feats: dict | None}
|
160 |
-
special_tokens = set(tokenizer.
|
161 |
ret = []
|
162 |
for sent_idx,sentence in enumerate(sentences):
|
163 |
input_id_strs = tokenizer.convert_ids_to_tokens(inputs['input_ids'][sent_idx])
|
|
|
157 |
# for each sentence, return a dict object with the following files { text, tokens }
|
158 |
# Where tokens is a list of dicts, where each dict is:
|
159 |
# { pos: str, feats: dict, prefixes: List[str], suffix: str | bool, suffix_feats: dict | None}
|
160 |
+
special_tokens = set([tokenizer.pad_token, tokenizer.cls_token, tokenizer.sep_token])
|
161 |
ret = []
|
162 |
for sent_idx,sentence in enumerate(sentences):
|
163 |
input_id_strs = tokenizer.convert_ids_to_tokens(inputs['input_ids'][sent_idx])
|