{ "best_metric": 0.47333332896232605, "best_model_checkpoint": "./zephyr/08-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.17-KTO_Hyperparameter search, altering lora params for KTO task.-2_max_steps-145_batch_16_2024-04-08_ppid_9/checkpoint-100", "epoch": 0.684931506849315, "eval_steps": 50, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 0.00018, "loss": 0.4638, "step": 20, "train/kl": 7.083856105804443, "train/logps/chosen": -426.40190360915494, "train/logps/rejected": -550.5845768960675, "train/rewards/chosen": -14.309082890900088, "train/rewards/margins": 11.978627234626599, "train/rewards/rejected": -26.287710125526687 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 0.00015142857142857143, "loss": 0.4844, "step": 40, "train/kl": 0.0, "train/logps/chosen": -3204.072177419355, "train/logps/rejected": -3086.9876893939395, "train/rewards/chosen": -292.8815524193548, "train/rewards/margins": -13.51173707844572, "train/rewards/rejected": -279.3698153409091 }, { "epoch": 0.34, "eval/kl": 0.0, "eval/logps/chosen": -2537.9452024647885, "eval/logps/rejected": -2313.8192246835442, "eval/rewards/chosen": -225.4286971830986, "eval/rewards/margins": -20.483273429142884, "eval/rewards/rejected": -204.9454237539557, "eval_loss": 0.47333332896232605, "eval_runtime": 143.3034, "eval_samples_per_second": 2.093, "eval_steps_per_second": 0.523, "step": 50 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 0.00012285714285714287, "loss": 0.5, "step": 60, "train/kl": 0.0, "train/logps/chosen": -2750.593359375, "train/logps/rejected": -2645.1216796875, "train/rewards/chosen": -247.13798828125, "train/rewards/margins": -11.170458984374989, "train/rewards/rejected": -235.967529296875 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 9.428571428571429e-05, "loss": 0.425, "step": 80, "train/kl": 0.0, "train/logps/chosen": -2778.7603400735293, "train/logps/rejected": -2450.7654551630435, "train/rewards/chosen": -248.5206801470588, "train/rewards/margins": -31.70898687260228, "train/rewards/rejected": -216.81169327445653 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 6.571428571428571e-05, "loss": 0.5031, "step": 100, "train/kl": 0.0, "train/logps/chosen": -2629.9400232919256, "train/logps/rejected": -2546.1786556603774, "train/rewards/chosen": -234.98452057453417, "train/rewards/margins": -9.083110393716566, "train/rewards/rejected": -225.9014101808176 }, { "epoch": 0.68, "eval/kl": 0.0, "eval/logps/chosen": -2499.3208626760565, "eval/logps/rejected": -2280.931566455696, "eval/rewards/chosen": -221.56628246038733, "eval/rewards/margins": -19.909649252317706, "eval/rewards/rejected": -201.65663320806962, "eval_loss": 0.47333332896232605, "eval_runtime": 143.2853, "eval_samples_per_second": 2.094, "eval_steps_per_second": 0.523, "step": 100 } ], "logging_steps": 20, "max_steps": 145, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }