Files changed (1) hide show
  1. README.md +4 -4
README.md CHANGED
@@ -16,11 +16,11 @@ class JiebaTokenizer(BertTokenizer):
16
  self.pre_tokenizer = pre_tokenizer
17
  def _tokenize(self, text, *arg, **kwargs):
18
  split_tokens = []
19
- for text in self.pre_tokenizer(text):
20
- if text in self.vocab:
21
- split_tokens.append(text)
22
  else:
23
- split_tokens.extend(super()._tokenize(text))
24
  return split_tokens
25
  model = BigBirdModel.from_pretrained('Lowin/chinese-bigbird-base-4096')
26
  tokenizer = JiebaTokenizer.from_pretrained('Lowin/chinese-bigbird-base-4096')
 
16
  self.pre_tokenizer = pre_tokenizer
17
  def _tokenize(self, text, *arg, **kwargs):
18
  split_tokens = []
19
+ for word in self.pre_tokenizer(text):
20
+ if word in self.vocab:
21
+ split_tokens.append(word)
22
  else:
23
+ split_tokens.extend(super()._tokenize(word))
24
  return split_tokens
25
  model = BigBirdModel.from_pretrained('Lowin/chinese-bigbird-base-4096')
26
  tokenizer = JiebaTokenizer.from_pretrained('Lowin/chinese-bigbird-base-4096')