{ "epoch": 1.0, "eval_logits/chosen": -2.244286060333252, "eval_logits/rejected": -2.2393975257873535, "eval_logps/chosen": -34.03020095825195, "eval_logps/rejected": -37.50043869018555, "eval_loss": 0.6983639597892761, "eval_rewards/accuracies": 0.4987541437149048, "eval_rewards/chosen": 0.0026094578206539154, "eval_rewards/margins": -0.007098525296896696, "eval_rewards/rejected": 0.00970798172056675, "eval_runtime": 145.7128, "eval_samples": 343, "eval_samples_per_second": 2.354, "eval_steps_per_second": 0.295, "train_loss": 0.6911570282725544, "train_runtime": 3257.344, "train_samples": 3079, "train_samples_per_second": 0.945, "train_steps_per_second": 0.118 }