File size: 1,012 Bytes
301655d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
{
"device": "cuda:0",
"seed": 42,
"dtype": "torch.bfloat16",
"hook_point_in": "blocks.2.hook_attn_out",
"hook_point_out": "blocks.2.hook_attn_out",
"use_decoder_bias": true,
"apply_decoder_bias_to_pre_encoder": false,
"expansion_factor": 8,
"d_model": 4096,
"d_sae": 32768,
"bias_init_method": "all_zero",
"act_fn": "jumprelu",
"jump_relu_threshold": 0.0247802734375,
"norm_activation": "dataset-wise",
"dataset_average_activation_norm": {
"in": 0.87890625,
"out": 0.87890625
},
"decoder_exactly_fixed_norm": false,
"sparsity_include_decoder_norm": true,
"use_glu_encoder": false,
"init_decoder_norm": 0.5,
"init_encoder_norm": null,
"init_encoder_with_decoder_transpose": true,
"lp": 1,
"l1_coefficient": 8e-05,
"l1_coefficient_warmup_steps": 39062,
"top_k": 50,
"k_warmup_steps": 39062,
"use_batch_norm_mse": true,
"use_ghost_grads": false,
"tp_size": 1,
"ddp_size": 1
} |