hugodk-sch's picture
End of training
5fa8621
raw
history blame
572 Bytes
{
"epoch": 1.0,
"eval_logits/chosen": -2.8198559284210205,
"eval_logits/rejected": -2.817356824874878,
"eval_logps/chosen": -31.425559997558594,
"eval_logps/rejected": -35.012386322021484,
"eval_loss": 0.6725602746009827,
"eval_rewards/accuracies": 0.5627076625823975,
"eval_rewards/chosen": -0.08586728572845459,
"eval_rewards/margins": 0.1026720479130745,
"eval_rewards/rejected": -0.1885393261909485,
"eval_runtime": 112.8393,
"eval_samples": 343,
"eval_samples_per_second": 3.04,
"eval_steps_per_second": 0.381
}