|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9947643979057592, |
|
"eval_steps": 100, |
|
"global_step": 95, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010471204188481676, |
|
"grad_norm": 14.42849432669548, |
|
"learning_rate": 2e-08, |
|
"logits/chosen": -2.705627918243408, |
|
"logits/rejected": -1.8209420442581177, |
|
"logps/chosen": -315.2232666015625, |
|
"logps/rejected": -333.2189025878906, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.10471204188481675, |
|
"grad_norm": 13.44691822523609, |
|
"learning_rate": 2e-07, |
|
"logits/chosen": -2.7048144340515137, |
|
"logits/rejected": -2.1536295413970947, |
|
"logps/chosen": -277.1604309082031, |
|
"logps/rejected": -290.7293701171875, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.5347222089767456, |
|
"rewards/chosen": 0.00019832928956020623, |
|
"rewards/margins": 0.0016432523261755705, |
|
"rewards/rejected": -0.0014449231093749404, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.2094240837696335, |
|
"grad_norm": 16.21569144475015, |
|
"learning_rate": 1.9324722294043556e-07, |
|
"logits/chosen": -2.492572546005249, |
|
"logits/rejected": -2.0814006328582764, |
|
"logps/chosen": -309.6625061035156, |
|
"logps/rejected": -296.83868408203125, |
|
"loss": 0.6658, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.003994358237832785, |
|
"rewards/margins": 0.057490717619657516, |
|
"rewards/rejected": -0.061485081911087036, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.31413612565445026, |
|
"grad_norm": 14.182589244162711, |
|
"learning_rate": 1.739008917220659e-07, |
|
"logits/chosen": -2.387019395828247, |
|
"logits/rejected": -1.9367955923080444, |
|
"logps/chosen": -299.33404541015625, |
|
"logps/rejected": -322.9083251953125, |
|
"loss": 0.5937, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.022330567240715027, |
|
"rewards/margins": 0.21069249510765076, |
|
"rewards/rejected": -0.23302307724952698, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.418848167539267, |
|
"grad_norm": 16.048018340744715, |
|
"learning_rate": 1.4457383557765383e-07, |
|
"logits/chosen": -2.4496796131134033, |
|
"logits/rejected": -2.079987049102783, |
|
"logps/chosen": -294.8586730957031, |
|
"logps/rejected": -345.1488037109375, |
|
"loss": 0.5035, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.1000712662935257, |
|
"rewards/margins": 0.45323365926742554, |
|
"rewards/rejected": -0.55330491065979, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5235602094240838, |
|
"grad_norm": 22.958491853819712, |
|
"learning_rate": 1.092268359463302e-07, |
|
"logits/chosen": -2.2879467010498047, |
|
"logits/rejected": -1.8292083740234375, |
|
"logps/chosen": -306.67706298828125, |
|
"logps/rejected": -418.6455993652344, |
|
"loss": 0.4123, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.4231874942779541, |
|
"rewards/margins": 0.8138816952705383, |
|
"rewards/rejected": -1.2370691299438477, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6282722513089005, |
|
"grad_norm": 19.370046611297532, |
|
"learning_rate": 7.263370099279171e-08, |
|
"logits/chosen": -2.491612672805786, |
|
"logits/rejected": -2.067852735519409, |
|
"logps/chosen": -442.7276916503906, |
|
"logps/rejected": -641.3619384765625, |
|
"loss": 0.2439, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -1.5304675102233887, |
|
"rewards/margins": 1.8317034244537354, |
|
"rewards/rejected": -3.362171173095703, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.7329842931937173, |
|
"grad_norm": 17.60064306931795, |
|
"learning_rate": 3.973653636207437e-08, |
|
"logits/chosen": -2.3535571098327637, |
|
"logits/rejected": -1.9946672916412354, |
|
"logps/chosen": -591.0353393554688, |
|
"logps/rejected": -910.4153442382812, |
|
"loss": 0.1698, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -2.834195613861084, |
|
"rewards/margins": 3.2335205078125, |
|
"rewards/rejected": -6.067716598510742, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.837696335078534, |
|
"grad_norm": 20.39945478972367, |
|
"learning_rate": 1.49782864270386e-08, |
|
"logits/chosen": -2.500075101852417, |
|
"logits/rejected": -2.16581654548645, |
|
"logps/chosen": -597.4744262695312, |
|
"logps/rejected": -964.6823120117188, |
|
"loss": 0.1431, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -3.152522563934326, |
|
"rewards/margins": 3.5699126720428467, |
|
"rewards/rejected": -6.722434997558594, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.9424083769633508, |
|
"grad_norm": 27.851116517075948, |
|
"learning_rate": 1.7026900316098214e-09, |
|
"logits/chosen": -2.4192073345184326, |
|
"logits/rejected": -2.111260175704956, |
|
"logps/chosen": -618.6544799804688, |
|
"logps/rejected": -911.25146484375, |
|
"loss": 0.137, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.717777967453003, |
|
"rewards/margins": 3.354006290435791, |
|
"rewards/rejected": -6.071784019470215, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.9947643979057592, |
|
"step": 95, |
|
"total_flos": 0.0, |
|
"train_loss": 0.38092811358602424, |
|
"train_runtime": 1269.7396, |
|
"train_samples_per_second": 9.621, |
|
"train_steps_per_second": 0.075 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 95, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|