zephyr-7b-sft-full-orpo / all_results.json
statking's picture
End of training
590cc04 verified
raw
history blame
925 Bytes
{
"epoch": 2.9984268484530676,
"eval_log_odds_chosen": 1.220078706741333,
"eval_log_odds_ratio": -1.047989010810852,
"eval_logits/chosen": -2.849764585494995,
"eval_logits/rejected": -2.8669533729553223,
"eval_logps/chosen": -2.7812891006469727,
"eval_logps/rejected": -3.8601787090301514,
"eval_loss": 1.3770909309387207,
"eval_nll_loss": 1.3531930446624756,
"eval_rewards/accuracies": 0.6527777910232544,
"eval_rewards/chosen": -0.13906444609165192,
"eval_rewards/margins": 0.05394447594881058,
"eval_rewards/rejected": -0.1930089294910431,
"eval_runtime": 135.9849,
"eval_samples": 1994,
"eval_samples_per_second": 14.663,
"eval_steps_per_second": 0.463,
"total_flos": 0.0,
"train_loss": 0.32389816019492534,
"train_runtime": 62235.4926,
"train_samples": 61005,
"train_samples_per_second": 2.941,
"train_steps_per_second": 0.046
}