{"mse_mag_multiply": true, "loss_balancer_beta": 0.7520229775744602, "loss_balancer_r": 0.9706519501751338, "tf_pma_low_exp_2": 6, "grad_loss_fn": "mae", "pma_ffn_mode": "shared", "patience": 10, "inds_init_mode": "fixnorm", "grad_clip": 0.6896836352825375, "head_final_mul": "identity", "gradient_penalty_mode": "NONE", "dataset_size_exp_2": 11, "batch_size_exp_2": 2, "epochs": 100, "lr_mul": 0.08030439779404704, "n_warmup_steps": 85, "Optim": "diffgrad", "fixed_role_model": "tab_ddpm_concat", "mse_mag_target": 0.13044551835398707, "d_model_exp_2": 8, "attn_activation": "sigmoid", "tf_d_inner_exp_2": 8, "tf_n_layers_enc": 5, "tf_n_head_exp_2": 7, "tf_activation": "leakyhardsigmoid", "tf_activation_final": "leakyhardtanh", "tf_num_inds_exp_2": 6, "ada_d_hid_exp_2": 8, "ada_n_layers": 8, "ada_activation": "relu6", "ada_activation_final": "leakyhardtanh", "head_d_hid_exp_2": 8, "head_n_layers": 8, "head_n_head_exp_2": 5, "head_activation": "relu6", "head_activation_final": "softsign", "mse_mag": false, "single_model": true, "models": ["tab_ddpm_concat"], "max_seconds": 3600}