lambda-llama-3-8b-dpo-test-orca / trainer_state.json
tanliboy's picture
Model save
be52e11 verified
raw
history blame
6.21 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9947643979057592,
"eval_steps": 100,
"global_step": 95,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.010471204188481676,
"grad_norm": 14.42849432669548,
"learning_rate": 2e-08,
"logits/chosen": -2.705627918243408,
"logits/rejected": -1.8209420442581177,
"logps/chosen": -315.2232666015625,
"logps/rejected": -333.2189025878906,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.10471204188481675,
"grad_norm": 13.44691822523609,
"learning_rate": 2e-07,
"logits/chosen": -2.7048144340515137,
"logits/rejected": -2.1536295413970947,
"logps/chosen": -277.1604309082031,
"logps/rejected": -290.7293701171875,
"loss": 0.6923,
"rewards/accuracies": 0.5347222089767456,
"rewards/chosen": 0.00019832928956020623,
"rewards/margins": 0.0016432523261755705,
"rewards/rejected": -0.0014449231093749404,
"step": 10
},
{
"epoch": 0.2094240837696335,
"grad_norm": 16.21569144475015,
"learning_rate": 1.9324722294043556e-07,
"logits/chosen": -2.492572546005249,
"logits/rejected": -2.0814006328582764,
"logps/chosen": -309.6625061035156,
"logps/rejected": -296.83868408203125,
"loss": 0.6658,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.003994358237832785,
"rewards/margins": 0.057490717619657516,
"rewards/rejected": -0.061485081911087036,
"step": 20
},
{
"epoch": 0.31413612565445026,
"grad_norm": 14.182589244162711,
"learning_rate": 1.739008917220659e-07,
"logits/chosen": -2.387019395828247,
"logits/rejected": -1.9367955923080444,
"logps/chosen": -299.33404541015625,
"logps/rejected": -322.9083251953125,
"loss": 0.5937,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -0.022330567240715027,
"rewards/margins": 0.21069249510765076,
"rewards/rejected": -0.23302307724952698,
"step": 30
},
{
"epoch": 0.418848167539267,
"grad_norm": 16.048018340744715,
"learning_rate": 1.4457383557765383e-07,
"logits/chosen": -2.4496796131134033,
"logits/rejected": -2.079987049102783,
"logps/chosen": -294.8586730957031,
"logps/rejected": -345.1488037109375,
"loss": 0.5035,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.1000712662935257,
"rewards/margins": 0.45323365926742554,
"rewards/rejected": -0.55330491065979,
"step": 40
},
{
"epoch": 0.5235602094240838,
"grad_norm": 22.958491853819712,
"learning_rate": 1.092268359463302e-07,
"logits/chosen": -2.2879467010498047,
"logits/rejected": -1.8292083740234375,
"logps/chosen": -306.67706298828125,
"logps/rejected": -418.6455993652344,
"loss": 0.4123,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.4231874942779541,
"rewards/margins": 0.8138816952705383,
"rewards/rejected": -1.2370691299438477,
"step": 50
},
{
"epoch": 0.6282722513089005,
"grad_norm": 19.370046611297532,
"learning_rate": 7.263370099279171e-08,
"logits/chosen": -2.491612672805786,
"logits/rejected": -2.067852735519409,
"logps/chosen": -442.7276916503906,
"logps/rejected": -641.3619384765625,
"loss": 0.2439,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": -1.5304675102233887,
"rewards/margins": 1.8317034244537354,
"rewards/rejected": -3.362171173095703,
"step": 60
},
{
"epoch": 0.7329842931937173,
"grad_norm": 17.60064306931795,
"learning_rate": 3.973653636207437e-08,
"logits/chosen": -2.3535571098327637,
"logits/rejected": -1.9946672916412354,
"logps/chosen": -591.0353393554688,
"logps/rejected": -910.4153442382812,
"loss": 0.1698,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -2.834195613861084,
"rewards/margins": 3.2335205078125,
"rewards/rejected": -6.067716598510742,
"step": 70
},
{
"epoch": 0.837696335078534,
"grad_norm": 20.39945478972367,
"learning_rate": 1.49782864270386e-08,
"logits/chosen": -2.500075101852417,
"logits/rejected": -2.16581654548645,
"logps/chosen": -597.4744262695312,
"logps/rejected": -964.6823120117188,
"loss": 0.1431,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": -3.152522563934326,
"rewards/margins": 3.5699126720428467,
"rewards/rejected": -6.722434997558594,
"step": 80
},
{
"epoch": 0.9424083769633508,
"grad_norm": 27.851116517075948,
"learning_rate": 1.7026900316098214e-09,
"logits/chosen": -2.4192073345184326,
"logits/rejected": -2.111260175704956,
"logps/chosen": -618.6544799804688,
"logps/rejected": -911.25146484375,
"loss": 0.137,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -2.717777967453003,
"rewards/margins": 3.354006290435791,
"rewards/rejected": -6.071784019470215,
"step": 90
},
{
"epoch": 0.9947643979057592,
"step": 95,
"total_flos": 0.0,
"train_loss": 0.38092811358602424,
"train_runtime": 1269.7396,
"train_samples_per_second": 9.621,
"train_steps_per_second": 0.075
}
],
"logging_steps": 10,
"max_steps": 95,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}