File size: 389 Bytes
d27a756
a6c67ec
d27a756
751936e
 
a6c67ec
751936e
 
 
 
9495a4f
 
751936e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
"""

"""

import tiktoken
import tokenizer.tiktoken_patch

tokenizer = tiktoken.encoding_for_model('gpt-3.5-turbo')
tokenizer.vocab_size = tokenizer.n_vocab

tokenizer.comments = "tiktoken is a fast BPE tokeniser for use with OpenAI's models. There are 16 tokens KeyError"
tokenizer.reversible = True  # It's reversible and lossless, so you can convert tokens back into the original text