|
{ |
|
"best_metric": 0.544333279132843, |
|
"best_model_checkpoint": "./zephyr/09-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.9-KTO_Hyperparameter search, altering desired and undesired weights for KTO task.-2_max_steps-145_batch_16_2024-04-09_ppid_9/checkpoint-100", |
|
"epoch": 0.684931506849315, |
|
"eval_steps": 50, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.0, |
|
"kl": 0.9986292123794556, |
|
"learning_rate": 0.00018, |
|
"logps/chosen": -876.9887084960938, |
|
"logps/rejected": -891.7728271484375, |
|
"loss": 0.5066, |
|
"rewards/chosen": -59.218875885009766, |
|
"rewards/margins": 1.2524851560592651, |
|
"rewards/rejected": -60.5962028503418, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.00015142857142857143, |
|
"logps/chosen": -1901.8116455078125, |
|
"logps/rejected": -1940.32470703125, |
|
"loss": 0.5606, |
|
"rewards/chosen": -161.08740234375, |
|
"rewards/margins": 2.580688953399658, |
|
"rewards/rejected": -164.8165283203125, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -1797.90625, |
|
"eval_logps/rejected": -1778.623291015625, |
|
"eval_loss": 0.5443333983421326, |
|
"eval_rewards/chosen": -151.05047607421875, |
|
"eval_rewards/margins": 2.7896132469177246, |
|
"eval_rewards/rejected": -151.03567504882812, |
|
"eval_runtime": 138.2181, |
|
"eval_samples_per_second": 2.17, |
|
"eval_steps_per_second": 0.543, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.00012285714285714287, |
|
"logps/chosen": -1852.9610595703125, |
|
"logps/rejected": -1893.4521484375, |
|
"loss": 0.5211, |
|
"rewards/chosen": -157.99769592285156, |
|
"rewards/margins": 3.8418266773223877, |
|
"rewards/rejected": -160.36117553710938, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 9.428571428571429e-05, |
|
"logps/chosen": -2051.344482421875, |
|
"logps/rejected": -1892.0853271484375, |
|
"loss": 0.5714, |
|
"rewards/chosen": -176.026611328125, |
|
"rewards/margins": -20.468326568603516, |
|
"rewards/rejected": -161.7165069580078, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 6.571428571428571e-05, |
|
"logps/chosen": -2035.6630859375, |
|
"logps/rejected": -1939.001220703125, |
|
"loss": 0.4995, |
|
"rewards/chosen": -175.16189575195312, |
|
"rewards/margins": -12.196085929870605, |
|
"rewards/rejected": -165.23004150390625, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -1859.810791015625, |
|
"eval_logps/rejected": -1831.84814453125, |
|
"eval_loss": 0.544333279132843, |
|
"eval_rewards/chosen": -157.24095153808594, |
|
"eval_rewards/margins": 2.0474841594696045, |
|
"eval_rewards/rejected": -156.35816955566406, |
|
"eval_runtime": 138.0332, |
|
"eval_samples_per_second": 2.173, |
|
"eval_steps_per_second": 0.543, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 145, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|