mgoin commited on
Commit
e1baf6d
1 Parent(s): 8e637f1

Updated compression_config to quantization_config

Browse files
Files changed (1) hide show
  1. config.json +43 -43
config.json CHANGED
@@ -12,48 +12,6 @@
12
  },
13
  "aux_loss_alpha": 0.001,
14
  "bos_token_id": 100000,
15
- "compression_config": {
16
- "config_groups": {
17
- "group_0": {
18
- "input_activations": {
19
- "actorder": null,
20
- "block_structure": null,
21
- "dynamic": true,
22
- "group_size": null,
23
- "num_bits": 8,
24
- "observer": "memoryless",
25
- "observer_kwargs": {},
26
- "strategy": "token",
27
- "symmetric": true,
28
- "type": "int"
29
- },
30
- "output_activations": null,
31
- "targets": [
32
- "Linear"
33
- ],
34
- "weights": {
35
- "actorder": null,
36
- "block_structure": null,
37
- "dynamic": false,
38
- "group_size": null,
39
- "num_bits": 8,
40
- "observer": "minmax",
41
- "observer_kwargs": {},
42
- "strategy": "channel",
43
- "symmetric": true,
44
- "type": "int"
45
- }
46
- }
47
- },
48
- "format": "int-quantized",
49
- "global_compression_ratio": 1.2697642717533653,
50
- "ignore": [
51
- "lm_head"
52
- ],
53
- "kv_cache_scheme": null,
54
- "quant_method": "compressed-tensors",
55
- "quantization_status": "compressed"
56
- },
57
  "eos_token_id": 100001,
58
  "ep_size": 1,
59
  "first_k_dense_replace": 1,
@@ -99,5 +57,47 @@
99
  "transformers_version": "4.44.2",
100
  "use_cache": true,
101
  "v_head_dim": 128,
102
- "vocab_size": 102400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  }
 
12
  },
13
  "aux_loss_alpha": 0.001,
14
  "bos_token_id": 100000,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  "eos_token_id": 100001,
16
  "ep_size": 1,
17
  "first_k_dense_replace": 1,
 
57
  "transformers_version": "4.44.2",
58
  "use_cache": true,
59
  "v_head_dim": 128,
60
+ "vocab_size": 102400,
61
+ "quantization_config": {
62
+ "config_groups": {
63
+ "group_0": {
64
+ "input_activations": {
65
+ "actorder": null,
66
+ "block_structure": null,
67
+ "dynamic": true,
68
+ "group_size": null,
69
+ "num_bits": 8,
70
+ "observer": "memoryless",
71
+ "observer_kwargs": {},
72
+ "strategy": "token",
73
+ "symmetric": true,
74
+ "type": "int"
75
+ },
76
+ "output_activations": null,
77
+ "targets": [
78
+ "Linear"
79
+ ],
80
+ "weights": {
81
+ "actorder": null,
82
+ "block_structure": null,
83
+ "dynamic": false,
84
+ "group_size": null,
85
+ "num_bits": 8,
86
+ "observer": "minmax",
87
+ "observer_kwargs": {},
88
+ "strategy": "channel",
89
+ "symmetric": true,
90
+ "type": "int"
91
+ }
92
+ }
93
+ },
94
+ "format": "int-quantized",
95
+ "global_compression_ratio": 1.2697642717533653,
96
+ "ignore": [
97
+ "lm_head"
98
+ ],
99
+ "kv_cache_scheme": null,
100
+ "quant_method": "compressed-tensors",
101
+ "quantization_status": "compressed"
102
+ }
103
  }