mgoin's picture
Create recipe.yaml
fed89b0
raw
history blame contribute delete
No virus
935 Bytes
test_stage:
obcq_modifiers:
QuantizationModifier:
ignore:
# These operations don't make sense to quantize
- MistralRotaryEmbedding
- MistralRMSNorm
- SiLUActivation
# Skip quantizing the BMMs
# - QuantizableMatMul
# Skip quantizing the layers with the most sensitive activations
- model.layers.1.mlp.down_proj
- model.layers.31.mlp.down_proj
- model.layers.30.mlp.down_proj
- model.layers.30.mlp.gate_proj
- model.layers.30.mlp.up_proj
post_oneshot_calibration: true
scheme_overrides:
Embedding:
input_activations: null
weights:
num_bits: 8
symmetric: false
SparseGPTModifier:
sparsity: 0.5
block_size: 128
sequential_update: true
quantize: true
percdamp: 0.01
mask_structure: "0:0"
targets: ["re:model.layers.\\d*$"]