{"num_tokens": 100000, "batch_size": 32, "act_name": "hook_norm", "dict_size": 1536, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.999, "dict_mult": 32, "seq_len": 128, "remove_rare_dir": false, "device": "cuda:0", "enc_dtype": "fp32", "seed": 16, "act_size": 768, "model_batch_size": 32, "num_epochs": 5, "lr": 0.001, "l1_weight": 1e-05, "l2_weight": 1e-05, "log_every": 50, "eval_every": 100, "recons_every": 500, "save_every": 500, "reset_freq_threshold": 3.162277660168379e-06, "wandb_project": "mamba_autoencoder", "wandb_name": null, "encoder_hidden_sizes": [512, 256], "decoder_hidden_sizes": [256, 512], "latent_dim": 64}