{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 274, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.18248175182481752, "grad_norm": 0.11348239332437515, "learning_rate": 9.803954791481238e-07, "logits/chosen": 0.7525291442871094, "logits/rejected": 0.862596869468689, "logps/chosen": -57.87046432495117, "logps/rejected": -91.25581359863281, "loss": 0.042, "rewards/accuracies": 1.0, "rewards/chosen": 3.735088586807251, "rewards/margins": 5.059007167816162, "rewards/rejected": -1.3239188194274902, "step": 50 }, { "epoch": 0.36496350364963503, "grad_norm": 2.2142693996429443, "learning_rate": 8.03112705483319e-07, "logits/chosen": 0.7221750020980835, "logits/rejected": 0.8843123912811279, "logps/chosen": -54.99198913574219, "logps/rejected": -95.34280395507812, "loss": 0.0344, "rewards/accuracies": 1.0, "rewards/chosen": 3.793600559234619, "rewards/margins": 5.281071186065674, "rewards/rejected": -1.4874707460403442, "step": 100 }, { "epoch": 0.5474452554744526, "grad_norm": 1.848400354385376, "learning_rate": 5.063851773579869e-07, "logits/chosen": 0.7279171943664551, "logits/rejected": 0.8646959662437439, "logps/chosen": -55.23713684082031, "logps/rejected": -100.29174041748047, "loss": 0.031, "rewards/accuracies": 1.0, "rewards/chosen": 3.7684295177459717, "rewards/margins": 5.4548797607421875, "rewards/rejected": -1.6864502429962158, "step": 150 }, { "epoch": 0.7299270072992701, "grad_norm": 0.9244675040245056, "learning_rate": 2.071415028359026e-07, "logits/chosen": 0.7067604064941406, "logits/rejected": 0.8388283252716064, "logps/chosen": -50.11094284057617, "logps/rejected": -98.2806625366211, "loss": 0.0274, "rewards/accuracies": 1.0, "rewards/chosen": 4.066249847412109, "rewards/margins": 5.748508930206299, "rewards/rejected": -1.6822593212127686, "step": 200 }, { "epoch": 0.9124087591240876, "grad_norm": 0.7083641886711121, "learning_rate": 2.3301803972534728e-08, "logits/chosen": 0.7070822715759277, "logits/rejected": 0.857603132724762, "logps/chosen": -51.985496520996094, "logps/rejected": -93.11666870117188, "loss": 0.0305, "rewards/accuracies": 1.0, "rewards/chosen": 3.9429595470428467, "rewards/margins": 5.623107433319092, "rewards/rejected": -1.6801483631134033, "step": 250 } ], "logging_steps": 50, "max_steps": 274, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }