File size: 1,231 Bytes
de00f24
 
 
 
 
 
 
e8edc24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
pruning_stage:
    obcq_modifiers:
        SparseGPTModifier:
            sparsity: 0.5
            sequential_update: true
            mask_structure: "2:4"
            targets: ['re:model.layers.\d*$']
quant_stage:
    quant_modifiers:
        QuantizationModifier:
            ignore: ["lm_head"]
            config_groups:
                group_0:
                    weights:
                        num_bits: 8
                        type: float
                        strategy: tensor
                        dynamic: false
                        symmetric: true
                    input_activations:
                        num_bits: 8
                        type: float
                        strategy: tensor
                        dynamic: false
                        symmetric: true
                    targets: ["Linear"]
    pruning_modifiers:
        ConstantPruningModifier:
            targets: [
                're:.*q_proj.weight',
                're:.*k_proj.weight', 
                're:.*v_proj.weight',
                're:.*o_proj.weight',
                're:.*gate_proj.weight',
                're:.*up_proj.weight',
                're:.*down_proj.weight',
            ]
            start: 0