|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.989247311827957, |
|
"eval_steps": 100, |
|
"global_step": 46, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 13.15213108549844, |
|
"learning_rate": 1e-07, |
|
"logits/chosen": -3.0397768020629883, |
|
"logits/rejected": -2.909663200378418, |
|
"logps/chosen": -130.4150848388672, |
|
"logps/rejected": -304.69915771484375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 12.612483031980211, |
|
"learning_rate": 4.818756127755237e-07, |
|
"logits/chosen": -3.014326810836792, |
|
"logits/rejected": -2.936898708343506, |
|
"logps/chosen": -144.7748565673828, |
|
"logps/rejected": -202.1986541748047, |
|
"loss": 0.6866, |
|
"rewards/accuracies": 0.6527777910232544, |
|
"rewards/chosen": 0.0080408351495862, |
|
"rewards/margins": 0.009001191705465317, |
|
"rewards/rejected": -0.0009603556245565414, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 12.917969062898747, |
|
"learning_rate": 3.52267159292835e-07, |
|
"logits/chosen": -3.0234923362731934, |
|
"logits/rejected": -2.988145112991333, |
|
"logps/chosen": -146.7506103515625, |
|
"logps/rejected": -274.9992980957031, |
|
"loss": 0.636, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 0.0961742028594017, |
|
"rewards/margins": 0.11899683624505997, |
|
"rewards/rejected": -0.022822635248303413, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 13.065587999351779, |
|
"learning_rate": 1.6549578039787434e-07, |
|
"logits/chosen": -3.0107874870300293, |
|
"logits/rejected": -2.984440326690674, |
|
"logps/chosen": -151.49595642089844, |
|
"logps/rejected": -286.1717224121094, |
|
"loss": 0.5771, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 0.16383036971092224, |
|
"rewards/margins": 0.2549803853034973, |
|
"rewards/rejected": -0.09115001559257507, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 13.06247410612875, |
|
"learning_rate": 2.5958610759736126e-08, |
|
"logits/chosen": -3.0013656616210938, |
|
"logits/rejected": -2.9986214637756348, |
|
"logps/chosen": -124.09024810791016, |
|
"logps/rejected": -292.10986328125, |
|
"loss": 0.5399, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 0.20066456496715546, |
|
"rewards/margins": 0.3728640079498291, |
|
"rewards/rejected": -0.17219944298267365, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"step": 46, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6001577688300092, |
|
"train_runtime": 543.7858, |
|
"train_samples_per_second": 5.467, |
|
"train_steps_per_second": 0.085 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 46, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|