{ "epoch": 1.0, "eval_logits/chosen": -2.806074619293213, "eval_logits/rejected": -2.803609848022461, "eval_logps/chosen": -31.247922897338867, "eval_logps/rejected": -34.73902893066406, "eval_loss": 1.6120386123657227, "eval_rewards/accuracies": 0.5689368844032288, "eval_rewards/chosen": 0.01381184346973896, "eval_rewards/margins": 0.03016132488846779, "eval_rewards/rejected": -0.016349485144019127, "eval_runtime": 112.7318, "eval_samples": 343, "eval_samples_per_second": 3.043, "eval_steps_per_second": 0.381, "train_loss": 1.7377146052075672, "train_runtime": 2719.6001, "train_samples": 3079, "train_samples_per_second": 1.132, "train_steps_per_second": 0.142 }