hugodk-sch's picture
End of training
83a41b6
raw
history blame
570 Bytes
{
"epoch": 4.0,
"eval_logits/chosen": -2.8337740898132324,
"eval_logits/rejected": -2.830592393875122,
"eval_logps/chosen": -31.703718185424805,
"eval_logps/rejected": -35.44328689575195,
"eval_loss": 0.4786534011363983,
"eval_rewards/accuracies": 0.560215950012207,
"eval_rewards/chosen": -0.2527596652507782,
"eval_rewards/margins": 0.1943179965019226,
"eval_rewards/rejected": -0.4470776319503784,
"eval_runtime": 112.8079,
"eval_samples": 343,
"eval_samples_per_second": 3.041,
"eval_steps_per_second": 0.381
}