|
{ |
|
"hook_point_in": "blocks.15.hook_mlp_out", |
|
"hook_point_out": "blocks.15.hook_mlp_out", |
|
"use_decoder_bias": true, |
|
"apply_decoder_bias_to_pre_encoder": false, |
|
"expansion_factor": 8, |
|
"d_model": 4096, |
|
"d_sae": 32768, |
|
"norm_activation": "token-wise", |
|
"dataset_average_activation_norm": null, |
|
"decoder_exactly_fixed_norm": false, |
|
"sparsity_include_decoder_norm": true, |
|
"use_glu_encoder": false, |
|
"init_decoder_norm": null, |
|
"init_encoder_norm": null, |
|
"init_encoder_with_decoder_transpose": true, |
|
"l1_coefficient": 1.6e-05, |
|
"l1_coefficient_warmup_steps": 14648, |
|
"lp": 1, |
|
"use_ghost_grads": false, |
|
"tp_size": 1, |
|
"ddp_size": 1 |
|
} |