{"Body": "twin_encoder", "loss_balancer_meta": true, "loss_balancer_log": false, "loss_balancer_lbtw": false, "pma_skip_small": false, "isab_skip_small": false, "layer_norm": false, "pma_layer_norm": false, "attn_residual": true, "tf_n_layers_dec": false, "tf_isab_rank": 0, "tf_lora": false, "tf_layer_norm": false, "tf_pma_start": -1, "ada_n_seeds": 0, "head_n_seeds": 0, "tf_pma_low": 16, "gradient_penalty_kwargs": {"mag_loss": true, "mse_mag": true, "mag_corr": false, "seq_mag": false, "cos_loss": false, "mse_mag_kwargs": {"target": 1.0, "multiply": true}, "mag_corr_kwargs": {"only_sign": false}, "cos_loss_kwargs": {"only_sign": true, "cos_matrix": false}}, "dropout": 0, "combine_mode": "diff_left", "tf_isab_mode": "separate", "grad_loss_fn": "mae", "single_model": true, "bias": true, "bias_final": true, "pma_ffn_mode": "shared", "patience": 10, "inds_init_mode": "torch", "grad_clip": 0.8, "gradient_penalty_mode": "NONE", "synth_data": 2, "dataset_size": 2048, "batch_size": 4, "epochs": 100, "lr_mul": 0.04, "n_warmup_steps": 220, "Optim": "diffgrad", "loss_balancer_beta": 0.73, "loss_balancer_r": 0.94, "fixed_role_model": "tvae", "mse_mag": false, "mse_mag_target": 0.2, "mse_mag_multiply": false, "d_model": 512, "attn_activation": "leakyhardsigmoid", "tf_d_inner": 512, "tf_n_layers_enc": 4, "tf_n_head": 64, "tf_activation": "leakyhardtanh", "tf_activation_final": "leakyhardtanh", "tf_num_inds": 64, "ada_d_hid": 1024, "ada_n_layers": 7, "ada_activation": "selu", "ada_activation_final": "leakyhardsigmoid", "head_d_hid": 128, "head_n_layers": 8, "head_n_head": 64, "head_activation": "leakyhardsigmoid", "head_activation_final": "leakyhardsigmoid", "models": ["tvae"], "max_seconds": 3600}