|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.971563981042654, |
|
"eval_steps": 500, |
|
"global_step": 104, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.018957345971563982, |
|
"grad_norm": 135.46242416026297, |
|
"learning_rate": 4.545454545454545e-08, |
|
"logits/chosen": 203.78909301757812, |
|
"logits/rejected": 182.07696533203125, |
|
"logps/chosen": -434.955322265625, |
|
"logps/rejected": -429.50384521484375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.1895734597156398, |
|
"grad_norm": 153.21007203142685, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits/chosen": 172.51792907714844, |
|
"logits/rejected": 181.91905212402344, |
|
"logps/chosen": -379.5159912109375, |
|
"logps/rejected": -448.1414794921875, |
|
"loss": 0.7124, |
|
"rewards/accuracies": 0.4861111044883728, |
|
"rewards/chosen": 0.027371780946850777, |
|
"rewards/margins": 0.008838895708322525, |
|
"rewards/rejected": 0.01853288896381855, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.3791469194312796, |
|
"grad_norm": 142.08069477633126, |
|
"learning_rate": 4.885348141000122e-07, |
|
"logits/chosen": 173.6682891845703, |
|
"logits/rejected": 175.75106811523438, |
|
"logps/chosen": -381.7967834472656, |
|
"logps/rejected": -430.2057189941406, |
|
"loss": 0.6304, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.19567528367042542, |
|
"rewards/margins": 0.36299628019332886, |
|
"rewards/rejected": -0.16732101142406464, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5687203791469194, |
|
"grad_norm": 120.91669313906786, |
|
"learning_rate": 4.5025027361734613e-07, |
|
"logits/chosen": 164.82431030273438, |
|
"logits/rejected": 173.40679931640625, |
|
"logps/chosen": -365.9583740234375, |
|
"logps/rejected": -445.6947326660156, |
|
"loss": 0.5761, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0274367332458496, |
|
"rewards/margins": 1.1871185302734375, |
|
"rewards/rejected": -2.214555263519287, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.7582938388625592, |
|
"grad_norm": 99.23172159300925, |
|
"learning_rate": 3.893311157806091e-07, |
|
"logits/chosen": 170.08607482910156, |
|
"logits/rejected": 162.03958129882812, |
|
"logps/chosen": -414.80340576171875, |
|
"logps/rejected": -454.820556640625, |
|
"loss": 0.5641, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.624403715133667, |
|
"rewards/margins": 0.9876155853271484, |
|
"rewards/rejected": -3.6120193004608154, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.9478672985781991, |
|
"grad_norm": 107.79387973262907, |
|
"learning_rate": 3.126631330646801e-07, |
|
"logits/chosen": 177.14224243164062, |
|
"logits/rejected": 174.7544708251953, |
|
"logps/chosen": -459.03759765625, |
|
"logps/rejected": -512.6439208984375, |
|
"loss": 0.5158, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -3.4753570556640625, |
|
"rewards/margins": 1.3291194438934326, |
|
"rewards/rejected": -4.804476737976074, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.1374407582938388, |
|
"grad_norm": 53.02406081573369, |
|
"learning_rate": 2.2891223348923882e-07, |
|
"logits/chosen": 167.0361328125, |
|
"logits/rejected": 171.70101928710938, |
|
"logps/chosen": -414.3773498535156, |
|
"logps/rejected": -524.4793701171875, |
|
"loss": 0.3097, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -3.200485944747925, |
|
"rewards/margins": 2.440056085586548, |
|
"rewards/rejected": -5.6405415534973145, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.3270142180094786, |
|
"grad_norm": 62.10149124492704, |
|
"learning_rate": 1.4754491880085317e-07, |
|
"logits/chosen": 157.87100219726562, |
|
"logits/rejected": 165.49331665039062, |
|
"logps/chosen": -453.851318359375, |
|
"logps/rejected": -509.77960205078125, |
|
"loss": 0.2085, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -3.4425768852233887, |
|
"rewards/margins": 2.912635326385498, |
|
"rewards/rejected": -6.355212211608887, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.5165876777251186, |
|
"grad_norm": 92.48557214337542, |
|
"learning_rate": 7.775827023107834e-08, |
|
"logits/chosen": 155.17611694335938, |
|
"logits/rejected": 170.6410369873047, |
|
"logps/chosen": -473.2515563964844, |
|
"logps/rejected": -549.7650146484375, |
|
"loss": 0.187, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -3.7577788829803467, |
|
"rewards/margins": 2.7696285247802734, |
|
"rewards/rejected": -6.527407646179199, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.7061611374407581, |
|
"grad_norm": 50.507688635772325, |
|
"learning_rate": 2.7440387297912122e-08, |
|
"logits/chosen": 160.5852508544922, |
|
"logits/rejected": 169.7069854736328, |
|
"logps/chosen": -444.99725341796875, |
|
"logps/rejected": -585.0924072265625, |
|
"loss": 0.1675, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -3.7263991832733154, |
|
"rewards/margins": 3.2523887157440186, |
|
"rewards/rejected": -6.97878885269165, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.8957345971563981, |
|
"grad_norm": 44.901315947621455, |
|
"learning_rate": 2.27878296044029e-09, |
|
"logits/chosen": 160.45559692382812, |
|
"logits/rejected": 161.6788330078125, |
|
"logps/chosen": -444.6620178222656, |
|
"logps/rejected": -523.9886474609375, |
|
"loss": 0.1723, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -3.935753583908081, |
|
"rewards/margins": 3.035583734512329, |
|
"rewards/rejected": -6.971337795257568, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.971563981042654, |
|
"step": 104, |
|
"total_flos": 0.0, |
|
"train_loss": 0.39618923515081406, |
|
"train_runtime": 6217.6844, |
|
"train_samples_per_second": 2.171, |
|
"train_steps_per_second": 0.017 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 104, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|