stan-hua commited on
Commit
1300f68
1 Parent(s): 51ad104

Push folder to HuggingFace Hub

Browse files
Files changed (2) hide show
  1. config.json +32 -1
  2. recipe.yaml +7 -0
config.json CHANGED
@@ -23,6 +23,37 @@
23
  "num_hidden_layers": 80,
24
  "num_key_value_heads": 8,
25
  "pretraining_tp": 1,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  "rms_norm_eps": 1e-05,
27
  "rope_scaling": {
28
  "factor": 8.0,
@@ -37,4 +68,4 @@
37
  "transformers_version": "4.45.2",
38
  "use_cache": true,
39
  "vocab_size": 128256
40
- }
 
23
  "num_hidden_layers": 80,
24
  "num_key_value_heads": 8,
25
  "pretraining_tp": 1,
26
+ "quantization_config": {
27
+ "config_groups": {
28
+ "group_0": {
29
+ "input_activations": null,
30
+ "output_activations": null,
31
+ "targets": [
32
+ "Linear"
33
+ ],
34
+ "weights": {
35
+ "actorder": null,
36
+ "block_structure": null,
37
+ "dynamic": false,
38
+ "group_size": null,
39
+ "num_bits": 8,
40
+ "observer": "minmax",
41
+ "observer_kwargs": {},
42
+ "strategy": "channel",
43
+ "symmetric": true,
44
+ "type": "int"
45
+ }
46
+ }
47
+ },
48
+ "format": "pack-quantized",
49
+ "global_compression_ratio": 1.463543865167781,
50
+ "ignore": [
51
+ "lm_head"
52
+ ],
53
+ "kv_cache_scheme": null,
54
+ "quant_method": "compressed-tensors",
55
+ "quantization_status": "compressed"
56
+ },
57
  "rms_norm_eps": 1e-05,
58
  "rope_scaling": {
59
  "factor": 8.0,
 
68
  "transformers_version": "4.45.2",
69
  "use_cache": true,
70
  "vocab_size": 128256
71
+ }
recipe.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ DEFAULT_stage:
2
+ DEFAULT_modifiers:
3
+ SmoothQuantModifier: {smoothing_strength: 0.8}
4
+ QuantizationModifier:
5
+ ignore: [lm_head]
6
+ targets: Linear
7
+ scheme: W8A16