{ "epoch": 1.99, "eval_logits/chosen": -2.5726864337921143, "eval_logits/rejected": -2.556718587875366, "eval_logps/chosen": -85.11767578125, "eval_logps/rejected": -133.85464477539062, "eval_loss": 0.12992651760578156, "eval_rewards/accuracies": 0.9520547986030579, "eval_rewards/chosen": -2.199751138687134, "eval_rewards/margins": 4.689663887023926, "eval_rewards/rejected": -6.889414310455322, "eval_runtime": 288.3554, "eval_samples": 876, "eval_samples_per_second": 3.038, "eval_steps_per_second": 3.038, "train_loss": 0.20939183043777396, "train_runtime": 5162.9911, "train_samples": 3505, "train_samples_per_second": 1.358, "train_steps_per_second": 0.042 }