{ "epoch": 1.0, "eval_logits/chosen": 98.98737335205078, "eval_logits/rejected": 98.98121643066406, "eval_logps/chosen": -32.38493728637695, "eval_logps/rejected": -35.900760650634766, "eval_loss": 1.7667114734649658, "eval_rewards/accuracies": 0.48795682191848755, "eval_rewards/chosen": 0.04659241810441017, "eval_rewards/margins": -0.006021121051162481, "eval_rewards/rejected": 0.05261354520916939, "eval_runtime": 103.7641, "eval_samples": 343, "eval_samples_per_second": 3.306, "eval_steps_per_second": 0.414, "train_loss": 2.2952296467570514, "train_runtime": 2556.4368, "train_samples": 3079, "train_samples_per_second": 1.204, "train_steps_per_second": 0.151 }