stan-hua commited on
Commit
61899f9
1 Parent(s): af891fe

Push folder to HuggingFace Hub

Browse files
Files changed (4) hide show
  1. config.json +43 -1
  2. recipe.yaml +7 -0
  3. special_tokens_map.json +2 -1
  4. tokenizer_config.json +1 -0
config.json CHANGED
@@ -23,6 +23,48 @@
23
  "num_hidden_layers": 32,
24
  "num_key_value_heads": 8,
25
  "pretraining_tp": 1,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  "rms_norm_eps": 1e-05,
27
  "rope_scaling": {
28
  "factor": 8.0,
@@ -37,4 +79,4 @@
37
  "transformers_version": "4.45.2",
38
  "use_cache": true,
39
  "vocab_size": 128256
40
- }
 
23
  "num_hidden_layers": 32,
24
  "num_key_value_heads": 8,
25
  "pretraining_tp": 1,
26
+ "quantization_config": {
27
+ "config_groups": {
28
+ "group_0": {
29
+ "input_activations": {
30
+ "actorder": null,
31
+ "block_structure": null,
32
+ "dynamic": true,
33
+ "group_size": null,
34
+ "num_bits": 8,
35
+ "observer": null,
36
+ "observer_kwargs": {},
37
+ "strategy": "token",
38
+ "symmetric": true,
39
+ "type": "int"
40
+ },
41
+ "output_activations": null,
42
+ "targets": [
43
+ "Linear"
44
+ ],
45
+ "weights": {
46
+ "actorder": null,
47
+ "block_structure": null,
48
+ "dynamic": false,
49
+ "group_size": null,
50
+ "num_bits": 8,
51
+ "observer": "minmax",
52
+ "observer_kwargs": {},
53
+ "strategy": "channel",
54
+ "symmetric": true,
55
+ "type": "int"
56
+ }
57
+ }
58
+ },
59
+ "format": "int-quantized",
60
+ "global_compression_ratio": 1.458959021545191,
61
+ "ignore": [
62
+ "lm_head"
63
+ ],
64
+ "kv_cache_scheme": null,
65
+ "quant_method": "compressed-tensors",
66
+ "quantization_status": "compressed"
67
+ },
68
  "rms_norm_eps": 1e-05,
69
  "rope_scaling": {
70
  "factor": 8.0,
 
79
  "transformers_version": "4.45.2",
80
  "use_cache": true,
81
  "vocab_size": 128256
82
+ }
recipe.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ DEFAULT_stage:
2
+ DEFAULT_modifiers:
3
+ SmoothQuantModifier: {smoothing_strength: 0.8}
4
+ QuantizationModifier:
5
+ ignore: [lm_head]
6
+ targets: Linear
7
+ scheme: W8A8
special_tokens_map.json CHANGED
@@ -12,5 +12,6 @@
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
- }
 
16
  }
 
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
+ },
16
+ "pad_token": "<|eot_id|>"
17
  }
tokenizer_config.json CHANGED
@@ -2058,5 +2058,6 @@
2058
  "attention_mask"
2059
  ],
2060
  "model_max_length": 131072,
 
2061
  "tokenizer_class": "PreTrainedTokenizerFast"
2062
  }
 
2058
  "attention_mask"
2059
  ],
2060
  "model_max_length": 131072,
2061
+ "pad_token": "<|eot_id|>",
2062
  "tokenizer_class": "PreTrainedTokenizerFast"
2063
  }