Zintoulou commited on
Commit
358c34a
·
verified ·
1 Parent(s): ed4e5f9

Upload tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +25 -64
tokenizer_config.json CHANGED
@@ -1,83 +1,44 @@
1
  {
2
- "add_bos_token": true,
3
- "add_eos_token": false,
4
- "added_tokens_decoder": {
5
- "0": {
6
- "content": "<unk>",
7
- "lstrip": false,
8
- "normalized": true,
9
- "rstrip": false,
10
- "single_word": false,
11
- "special": true
12
- },
13
- "1": {
14
- "content": "<s>",
15
- "lstrip": false,
16
- "normalized": true,
17
- "rstrip": false,
18
- "single_word": false,
19
- "special": true
20
- },
21
- "2": {
22
- "content": "</s>",
23
- "lstrip": false,
24
- "normalized": true,
25
- "rstrip": false,
26
- "single_word": false,
27
- "special": true
28
- },
29
- "32007": {
30
- "content": "▁<PRE>",
31
- "lstrip": false,
32
- "normalized": false,
33
- "rstrip": false,
34
- "single_word": false,
35
- "special": true
36
- },
37
- "32008": {
38
- "content": "▁<SUF>",
39
- "lstrip": false,
40
- "normalized": false,
41
- "rstrip": false,
42
- "single_word": false,
43
- "special": true
44
- },
45
- "32009": {
46
- "content": "▁<MID>",
47
- "lstrip": false,
48
- "normalized": false,
49
- "rstrip": false,
50
- "single_word": false,
51
- "special": true
52
- },
53
- "32010": {
54
- "content": "▁<EOT>",
55
- "lstrip": false,
56
- "normalized": false,
57
- "rstrip": false,
58
- "single_word": false,
59
- "special": true
60
- }
61
- },
62
  "additional_special_tokens": [
63
  "▁<PRE>",
64
  "▁<MID>",
65
  "▁<SUF>",
66
  "▁<EOT>"
67
  ],
68
- "bos_token": "<s>",
 
 
 
 
 
 
 
69
  "clean_up_tokenization_spaces": false,
70
- "eos_token": "</s>",
 
 
 
 
 
 
 
71
  "eot_token": "▁<EOT>",
72
  "fill_token": "<FILL_ME>",
73
  "legacy": null,
74
  "middle_token": "▁<MID>",
75
  "model_max_length": 1000000000000000019884624838656,
76
- "pad_token": "</s>",
77
  "prefix_token": "▁<PRE>",
78
  "sp_model_kwargs": {},
79
  "suffix_token": "▁<SUF>",
80
  "tokenizer_class": "CodeLlamaTokenizer",
81
- "unk_token": "<unk>",
 
 
 
 
 
 
 
82
  "use_default_system_prompt": false
83
  }
 
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "additional_special_tokens": [
3
  "▁<PRE>",
4
  "▁<MID>",
5
  "▁<SUF>",
6
  "▁<EOT>"
7
  ],
8
+ "bos_token": {
9
+ "__type": "AddedToken",
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
  "clean_up_tokenization_spaces": false,
17
+ "eos_token": {
18
+ "__type": "AddedToken",
19
+ "content": "</s>",
20
+ "lstrip": false,
21
+ "normalized": true,
22
+ "rstrip": false,
23
+ "single_word": false
24
+ },
25
  "eot_token": "▁<EOT>",
26
  "fill_token": "<FILL_ME>",
27
  "legacy": null,
28
  "middle_token": "▁<MID>",
29
  "model_max_length": 1000000000000000019884624838656,
30
+ "pad_token": null,
31
  "prefix_token": "▁<PRE>",
32
  "sp_model_kwargs": {},
33
  "suffix_token": "▁<SUF>",
34
  "tokenizer_class": "CodeLlamaTokenizer",
35
+ "unk_token": {
36
+ "__type": "AddedToken",
37
+ "content": "<unk>",
38
+ "lstrip": false,
39
+ "normalized": true,
40
+ "rstrip": false,
41
+ "single_word": false
42
+ },
43
  "use_default_system_prompt": false
44
  }