{ "epoch": 4.0, "eval_logits/chosen": -2.8337740898132324, "eval_logits/rejected": -2.830592393875122, "eval_logps/chosen": -31.703718185424805, "eval_logps/rejected": -35.44328689575195, "eval_loss": 0.4786534011363983, "eval_rewards/accuracies": 0.560215950012207, "eval_rewards/chosen": -0.2527596652507782, "eval_rewards/margins": 0.1943179965019226, "eval_rewards/rejected": -0.4470776319503784, "eval_runtime": 112.8079, "eval_samples": 343, "eval_samples_per_second": 3.041, "eval_steps_per_second": 0.381 }