{ "epoch": 2.0, "eval_logits/chosen": -3.11618709564209, "eval_logits/rejected": -3.1104962825775146, "eval_logps/chosen": -59.58960723876953, "eval_logps/rejected": -64.70088958740234, "eval_loss": 0.6901275515556335, "eval_rewards/accuracies": 0.589219331741333, "eval_rewards/chosen": -0.00877712108194828, "eval_rewards/margins": 0.006430591456592083, "eval_rewards/rejected": -0.015207710675895214, "eval_runtime": 383.5579, "eval_samples": 4304, "eval_samples_per_second": 11.221, "eval_steps_per_second": 1.403, "total_flos": 0.0, "train_loss": 0.6883086910911629, "train_runtime": 95005.2512, "train_samples": 92858, "train_samples_per_second": 1.955, "train_steps_per_second": 0.122 }