Abhaykoul commited on
Commit
69db776
1 Parent(s): 10712fb

Upload 3 files

Browse files
special_tokens_map.json CHANGED
@@ -1,6 +1,34 @@
1
  {
2
- "bos_token": "<|im_start|>",
3
- "eos_token": "<|im_end|>",
4
- "pad_token": "<|endoftext|>",
5
- "unk_token": "<|endoftext|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  }
 
1
  {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>"
5
+ ],
6
+ "bos_token": {
7
+ "content": "<|im_start|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "eos_token": {
14
+ "content": "<|im_end|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "pad_token": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ },
27
+ "unk_token": {
28
+ "content": "<|endoftext|>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ }
34
  }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -138,12 +138,17 @@
138
  "special": true
139
  }
140
  },
 
 
 
 
141
  "bos_token": "<|im_start|>",
142
- "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{'<|im_start|>user\n' + message['content'] + '<|im_end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|im_start|>assistant\n' + message['content'] + '<|im_end|>\n' }}{% else %}{{ '<|im_start|>system\n' + message['content'] + '<|im_end|>\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
143
  "clean_up_tokenization_spaces": false,
144
  "eos_token": "<|im_end|>",
145
- "model_max_length": 1000000000000000019884624838656,
146
- "pad_token": "<|endoftext|>",
147
  "tokenizer_class": "GPT2Tokenizer",
148
- "unk_token": "<|endoftext|>"
 
149
  }
 
138
  "special": true
139
  }
140
  },
141
+ "additional_special_tokens": [
142
+ "<|im_start|>",
143
+ "<|im_end|>"
144
+ ],
145
  "bos_token": "<|im_start|>",
146
+ "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful AI assistant named SmolLM, trained by Hugging Face<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
147
  "clean_up_tokenization_spaces": false,
148
  "eos_token": "<|im_end|>",
149
+ "model_max_length": 2048,
150
+ "pad_token": "<|im_end|>",
151
  "tokenizer_class": "GPT2Tokenizer",
152
+ "unk_token": "<|endoftext|>",
153
+ "vocab_size": 49152
154
  }