{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.989247311827957, "eval_steps": 100, "global_step": 46, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "grad_norm": 13.15213108549844, "learning_rate": 1e-07, "logits/chosen": -3.0397768020629883, "logits/rejected": -2.909663200378418, "logps/chosen": -130.4150848388672, "logps/rejected": -304.69915771484375, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.22, "grad_norm": 12.612483031980211, "learning_rate": 4.818756127755237e-07, "logits/chosen": -3.014326810836792, "logits/rejected": -2.936898708343506, "logps/chosen": -144.7748565673828, "logps/rejected": -202.1986541748047, "loss": 0.6866, "rewards/accuracies": 0.6527777910232544, "rewards/chosen": 0.0080408351495862, "rewards/margins": 0.009001191705465317, "rewards/rejected": -0.0009603556245565414, "step": 10 }, { "epoch": 0.43, "grad_norm": 12.917969062898747, "learning_rate": 3.52267159292835e-07, "logits/chosen": -3.0234923362731934, "logits/rejected": -2.988145112991333, "logps/chosen": -146.7506103515625, "logps/rejected": -274.9992980957031, "loss": 0.636, "rewards/accuracies": 0.893750011920929, "rewards/chosen": 0.0961742028594017, "rewards/margins": 0.11899683624505997, "rewards/rejected": -0.022822635248303413, "step": 20 }, { "epoch": 0.65, "grad_norm": 13.065587999351779, "learning_rate": 1.6549578039787434e-07, "logits/chosen": -3.0107874870300293, "logits/rejected": -2.984440326690674, "logps/chosen": -151.49595642089844, "logps/rejected": -286.1717224121094, "loss": 0.5771, "rewards/accuracies": 0.956250011920929, "rewards/chosen": 0.16383036971092224, "rewards/margins": 0.2549803853034973, "rewards/rejected": -0.09115001559257507, "step": 30 }, { "epoch": 0.86, "grad_norm": 13.06247410612875, "learning_rate": 2.5958610759736126e-08, "logits/chosen": -3.0013656616210938, "logits/rejected": -2.9986214637756348, "logps/chosen": -124.09024810791016, "logps/rejected": -292.10986328125, "loss": 0.5399, "rewards/accuracies": 0.956250011920929, "rewards/chosen": 0.20066456496715546, "rewards/margins": 0.3728640079498291, "rewards/rejected": -0.17219944298267365, "step": 40 }, { "epoch": 0.99, "step": 46, "total_flos": 0.0, "train_loss": 0.6001577688300092, "train_runtime": 543.7858, "train_samples_per_second": 5.467, "train_steps_per_second": 0.085 } ], "logging_steps": 10, "max_steps": 46, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }