|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.986666666666667, |
|
"eval_steps": 500, |
|
"global_step": 168, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.17777777777777778, |
|
"grad_norm": 3.8474488258361816, |
|
"learning_rate": 2.9411764705882355e-06, |
|
"logits/chosen": -2.332357883453369, |
|
"logits/rejected": -2.368460178375244, |
|
"logps/chosen": -1.2429828643798828, |
|
"logps/rejected": -1.659155249595642, |
|
"loss": 1.2935, |
|
"odds_ratio_loss": 11.457437515258789, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.12429828941822052, |
|
"rewards/margins": 0.04161724075675011, |
|
"rewards/rejected": -0.16591551899909973, |
|
"sft_loss": 0.14774402976036072, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.35555555555555557, |
|
"grad_norm": 5.139791965484619, |
|
"learning_rate": 4.995131923687488e-06, |
|
"logits/chosen": -2.292804002761841, |
|
"logits/rejected": -2.327223300933838, |
|
"logps/chosen": -1.2883718013763428, |
|
"logps/rejected": -1.7239799499511719, |
|
"loss": 1.337, |
|
"odds_ratio_loss": 11.885274887084961, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.12883718311786652, |
|
"rewards/margins": 0.043560806661844254, |
|
"rewards/rejected": -0.17239800095558167, |
|
"sft_loss": 0.14851602911949158, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5333333333333333, |
|
"grad_norm": 5.0462751388549805, |
|
"learning_rate": 4.90911473983908e-06, |
|
"logits/chosen": -2.3472771644592285, |
|
"logits/rejected": -2.3845486640930176, |
|
"logps/chosen": -1.186187982559204, |
|
"logps/rejected": -1.59175705909729, |
|
"loss": 1.2375, |
|
"odds_ratio_loss": 10.995233535766602, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.11861880123615265, |
|
"rewards/margins": 0.04055692255496979, |
|
"rewards/rejected": -0.15917572379112244, |
|
"sft_loss": 0.13796505331993103, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.7111111111111111, |
|
"grad_norm": 3.613401412963867, |
|
"learning_rate": 4.71919261421297e-06, |
|
"logits/chosen": -2.370697021484375, |
|
"logits/rejected": -2.4233319759368896, |
|
"logps/chosen": -0.9008461236953735, |
|
"logps/rejected": -1.4053981304168701, |
|
"loss": 0.9426, |
|
"odds_ratio_loss": 8.488945007324219, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -0.09008461236953735, |
|
"rewards/margins": 0.05045522004365921, |
|
"rewards/rejected": -0.14053983986377716, |
|
"sft_loss": 0.09369887411594391, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 3.07717227935791, |
|
"learning_rate": 4.43355687413747e-06, |
|
"logits/chosen": -2.4286742210388184, |
|
"logits/rejected": -2.4549202919006348, |
|
"logps/chosen": -0.6461768746376038, |
|
"logps/rejected": -1.0805357694625854, |
|
"loss": 0.6945, |
|
"odds_ratio_loss": 6.29970645904541, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.06461768597364426, |
|
"rewards/margins": 0.04343589395284653, |
|
"rewards/rejected": -0.10805357992649078, |
|
"sft_loss": 0.06456876546144485, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.0666666666666667, |
|
"grad_norm": 2.27549409866333, |
|
"learning_rate": 4.064526968101844e-06, |
|
"logits/chosen": -2.317702531814575, |
|
"logits/rejected": -2.337463855743408, |
|
"logps/chosen": -0.5583322048187256, |
|
"logps/rejected": -1.0102574825286865, |
|
"loss": 0.6081, |
|
"odds_ratio_loss": 5.508663177490234, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.05583322048187256, |
|
"rewards/margins": 0.04519252851605415, |
|
"rewards/rejected": -0.10102574527263641, |
|
"sft_loss": 0.05719046667218208, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.2444444444444445, |
|
"grad_norm": 2.98760724067688, |
|
"learning_rate": 3.6280191288478437e-06, |
|
"logits/chosen": -2.4034271240234375, |
|
"logits/rejected": -2.4294583797454834, |
|
"logps/chosen": -0.35430365800857544, |
|
"logps/rejected": -0.7878178358078003, |
|
"loss": 0.3968, |
|
"odds_ratio_loss": 3.5909438133239746, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.035430364310741425, |
|
"rewards/margins": 0.0433514229953289, |
|
"rewards/rejected": -0.07878179103136063, |
|
"sft_loss": 0.037743426859378815, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.4222222222222223, |
|
"grad_norm": 2.200446605682373, |
|
"learning_rate": 3.142859907420615e-06, |
|
"logits/chosen": -2.336760997772217, |
|
"logits/rejected": -2.365061044692993, |
|
"logps/chosen": -0.2458254098892212, |
|
"logps/rejected": -0.6315831542015076, |
|
"loss": 0.2882, |
|
"odds_ratio_loss": 2.5942039489746094, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.02458254061639309, |
|
"rewards/margins": 0.03857577592134476, |
|
"rewards/rejected": -0.063158318400383, |
|
"sft_loss": 0.028748363256454468, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 1.6536734104156494, |
|
"learning_rate": 2.629974185404951e-06, |
|
"logits/chosen": -2.313284397125244, |
|
"logits/rejected": -2.3488173484802246, |
|
"logps/chosen": -0.1942504495382309, |
|
"logps/rejected": -0.6369145512580872, |
|
"loss": 0.2406, |
|
"odds_ratio_loss": 2.1658334732055664, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.01942504197359085, |
|
"rewards/margins": 0.04426640644669533, |
|
"rewards/rejected": -0.06369145214557648, |
|
"sft_loss": 0.023997236043214798, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.7777777777777777, |
|
"grad_norm": 1.2765487432479858, |
|
"learning_rate": 2.1114826863194882e-06, |
|
"logits/chosen": -2.2804689407348633, |
|
"logits/rejected": -2.3144404888153076, |
|
"logps/chosen": -0.13879844546318054, |
|
"logps/rejected": -0.6243221163749695, |
|
"loss": 0.1759, |
|
"odds_ratio_loss": 1.5965832471847534, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.013879844918847084, |
|
"rewards/margins": 0.048552367836236954, |
|
"rewards/rejected": -0.06243220716714859, |
|
"sft_loss": 0.01626196689903736, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.9555555555555557, |
|
"grad_norm": 1.2211233377456665, |
|
"learning_rate": 1.6097479104361328e-06, |
|
"logits/chosen": -2.317593812942505, |
|
"logits/rejected": -2.336158037185669, |
|
"logps/chosen": -0.1410079300403595, |
|
"logps/rejected": -0.5155030488967896, |
|
"loss": 0.1836, |
|
"odds_ratio_loss": 1.6868371963500977, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.014100792817771435, |
|
"rewards/margins": 0.037449512630701065, |
|
"rewards/rejected": -0.051550306379795074, |
|
"sft_loss": 0.01492035947740078, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.1333333333333333, |
|
"grad_norm": 1.3183128833770752, |
|
"learning_rate": 1.1464096417858821e-06, |
|
"logits/chosen": -2.290233850479126, |
|
"logits/rejected": -2.3088765144348145, |
|
"logps/chosen": -0.10447756201028824, |
|
"logps/rejected": -0.6317979097366333, |
|
"loss": 0.1326, |
|
"odds_ratio_loss": 1.1886184215545654, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.010447757318615913, |
|
"rewards/margins": 0.052732039242982864, |
|
"rewards/rejected": -0.06317979097366333, |
|
"sft_loss": 0.013712344691157341, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.311111111111111, |
|
"grad_norm": 1.7163466215133667, |
|
"learning_rate": 7.414516258630245e-07, |
|
"logits/chosen": -2.262594223022461, |
|
"logits/rejected": -2.284545421600342, |
|
"logps/chosen": -0.12122899293899536, |
|
"logps/rejected": -0.5315740704536438, |
|
"loss": 0.1618, |
|
"odds_ratio_loss": 1.4604320526123047, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.012122899293899536, |
|
"rewards/margins": 0.0410345084965229, |
|
"rewards/rejected": -0.05315741151571274, |
|
"sft_loss": 0.015804503113031387, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.488888888888889, |
|
"grad_norm": 3.0588433742523193, |
|
"learning_rate": 4.123396721497977e-07, |
|
"logits/chosen": -2.3231940269470215, |
|
"logits/rejected": -2.34096622467041, |
|
"logps/chosen": -0.12840591371059418, |
|
"logps/rejected": -0.5461179614067078, |
|
"loss": 0.1688, |
|
"odds_ratio_loss": 1.521639108657837, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.012840591371059418, |
|
"rewards/margins": 0.04177120327949524, |
|
"rewards/rejected": -0.05461179465055466, |
|
"sft_loss": 0.016592377796769142, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.6666666666666665, |
|
"grad_norm": 2.731790065765381, |
|
"learning_rate": 1.7326835503629542e-07, |
|
"logits/chosen": -2.2994039058685303, |
|
"logits/rejected": -2.324022054672241, |
|
"logps/chosen": -0.12199757248163223, |
|
"logps/rejected": -0.5101950168609619, |
|
"loss": 0.1648, |
|
"odds_ratio_loss": 1.4926129579544067, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.012199757620692253, |
|
"rewards/margins": 0.03881974145770073, |
|
"rewards/rejected": -0.05101950094103813, |
|
"sft_loss": 0.015529977157711983, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.8444444444444446, |
|
"grad_norm": 1.6209518909454346, |
|
"learning_rate": 3.4548802869627806e-08, |
|
"logits/chosen": -2.19964599609375, |
|
"logits/rejected": -2.222959041595459, |
|
"logps/chosen": -0.11084076017141342, |
|
"logps/rejected": -0.5317128300666809, |
|
"loss": 0.1456, |
|
"odds_ratio_loss": 1.3233401775360107, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.011084076017141342, |
|
"rewards/margins": 0.042087212204933167, |
|
"rewards/rejected": -0.05317128449678421, |
|
"sft_loss": 0.013311339542269707, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.986666666666667, |
|
"step": 168, |
|
"total_flos": 4.410496642646016e+16, |
|
"train_loss": 0.4940077399923688, |
|
"train_runtime": 519.2548, |
|
"train_samples_per_second": 5.2, |
|
"train_steps_per_second": 0.324 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 168, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.410496642646016e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|