zephyr-7b-dpo-qlora / eval_results.json
Flowersea37's picture
End of training
152a978 verified
{
"epoch": 0.9999018549416037,
"eval_logits/chosen": -1.4624338150024414,
"eval_logits/rejected": -1.4133074283599854,
"eval_logps/chosen": -460.1110534667969,
"eval_logps/rejected": -483.5314025878906,
"eval_loss": 0.5487077236175537,
"eval_rewards/accuracies": 0.7380239367485046,
"eval_rewards/chosen": -1.126965880393982,
"eval_rewards/margins": 0.6619576215744019,
"eval_rewards/rejected": -1.7889233827590942,
"eval_runtime": 296.9255,
"eval_samples": 2000,
"eval_samples_per_second": 6.736,
"eval_steps_per_second": 1.125
}