{ "epoch": 0.99, "eval_logits/chosen": -3.1902854442596436, "eval_logits/rejected": -3.0295298099517822, "eval_logps/chosen": -123.95388793945312, "eval_logps/rejected": -352.96429443359375, "eval_loss": 0.5983969569206238, "eval_rewards/accuracies": 0.84375, "eval_rewards/chosen": 0.025536777451634407, "eval_rewards/margins": 0.23942241072654724, "eval_rewards/rejected": -0.2138856202363968, "eval_runtime": 22.1643, "eval_samples": 241, "eval_samples_per_second": 10.873, "eval_steps_per_second": 0.361, "train_loss": 0.6001577688300092, "train_runtime": 543.7858, "train_samples": 2973, "train_samples_per_second": 5.467, "train_steps_per_second": 0.085 }