{ "epoch": 2.97, "eval_logits/chosen": -2.226133108139038, "eval_logits/rejected": -2.1600077152252197, "eval_logps/chosen": -264.3455505371094, "eval_logps/rejected": -222.62977600097656, "eval_loss": 0.6542457342147827, "eval_rewards/accuracies": 0.6765000224113464, "eval_rewards/chosen": 0.06409955769777298, "eval_rewards/margins": 0.08906211704015732, "eval_rewards/rejected": -0.024962568655610085, "eval_runtime": 2320.838, "eval_samples": 2000, "eval_samples_per_second": 0.862, "eval_steps_per_second": 0.215, "train_loss": 0.6692726473427482, "train_runtime": 42824.2925, "train_samples": 6196, "train_samples_per_second": 0.434, "train_steps_per_second": 0.007 }