{ "epoch": 1.0, "eval_logits/chosen": -2.243980884552002, "eval_logits/rejected": -2.2390992641448975, "eval_logps/chosen": -34.03550720214844, "eval_logps/rejected": -37.50373458862305, "eval_loss": 1.0085219144821167, "eval_rewards/accuracies": 0.48172760009765625, "eval_rewards/chosen": -0.0005728387623094022, "eval_rewards/margins": -0.008305290713906288, "eval_rewards/rejected": 0.007732452359050512, "eval_runtime": 145.8784, "eval_samples": 343, "eval_samples_per_second": 2.351, "eval_steps_per_second": 0.295, "train_loss": 0.9932638019710393, "train_runtime": 3254.1177, "train_samples": 3079, "train_samples_per_second": 0.946, "train_steps_per_second": 0.118 }