ap-normistral-7b-align-scan / all_results.json
hugodk-sch's picture
End of training
3c889b7 verified
raw
history blame
738 Bytes
{
"epoch": 1.0,
"eval_logits/chosen": 98.69615173339844,
"eval_logits/rejected": 98.67865753173828,
"eval_logps/chosen": -32.57830810546875,
"eval_logps/rejected": -36.26156234741211,
"eval_loss": 0.9490477442741394,
"eval_rewards/accuracies": 0.5689368844032288,
"eval_rewards/chosen": -0.06756443530321121,
"eval_rewards/margins": 0.07995220273733139,
"eval_rewards/rejected": -0.1475166231393814,
"eval_runtime": 103.72,
"eval_samples": 343,
"eval_samples_per_second": 3.307,
"eval_steps_per_second": 0.415,
"train_loss": 0.735349414874981,
"train_runtime": 2550.735,
"train_samples": 3079,
"train_samples_per_second": 1.207,
"train_steps_per_second": 0.151
}