{ "epoch": 1.0, "eval_logits/chosen": -2.805530071258545, "eval_logits/rejected": -2.802974224090576, "eval_logps/chosen": -31.315858840942383, "eval_logps/rejected": -34.838993072509766, "eval_loss": 6.074533939361572, "eval_rewards/accuracies": 0.5660299062728882, "eval_rewards/chosen": -0.006680913269519806, "eval_rewards/margins": 0.021486198529601097, "eval_rewards/rejected": -0.0281671155244112, "eval_runtime": 112.8101, "eval_samples": 343, "eval_samples_per_second": 3.041, "eval_steps_per_second": 0.381 }