{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.986666666666667, "eval_steps": 500, "global_step": 168, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.17777777777777778, "grad_norm": 3.8474488258361816, "learning_rate": 2.9411764705882355e-06, "logits/chosen": -2.332357883453369, "logits/rejected": -2.368460178375244, "logps/chosen": -1.2429828643798828, "logps/rejected": -1.659155249595642, "loss": 1.2935, "odds_ratio_loss": 11.457437515258789, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.12429828941822052, "rewards/margins": 0.04161724075675011, "rewards/rejected": -0.16591551899909973, "sft_loss": 0.14774402976036072, "step": 10 }, { "epoch": 0.35555555555555557, "grad_norm": 5.139791965484619, "learning_rate": 4.995131923687488e-06, "logits/chosen": -2.292804002761841, "logits/rejected": -2.327223300933838, "logps/chosen": -1.2883718013763428, "logps/rejected": -1.7239799499511719, "loss": 1.337, "odds_ratio_loss": 11.885274887084961, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.12883718311786652, "rewards/margins": 0.043560806661844254, "rewards/rejected": -0.17239800095558167, "sft_loss": 0.14851602911949158, "step": 20 }, { "epoch": 0.5333333333333333, "grad_norm": 5.0462751388549805, "learning_rate": 4.90911473983908e-06, "logits/chosen": -2.3472771644592285, "logits/rejected": -2.3845486640930176, "logps/chosen": -1.186187982559204, "logps/rejected": -1.59175705909729, "loss": 1.2375, "odds_ratio_loss": 10.995233535766602, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.11861880123615265, "rewards/margins": 0.04055692255496979, "rewards/rejected": -0.15917572379112244, "sft_loss": 0.13796505331993103, "step": 30 }, { "epoch": 0.7111111111111111, "grad_norm": 3.613401412963867, "learning_rate": 4.71919261421297e-06, "logits/chosen": -2.370697021484375, "logits/rejected": -2.4233319759368896, "logps/chosen": -0.9008461236953735, "logps/rejected": -1.4053981304168701, "loss": 0.9426, "odds_ratio_loss": 8.488945007324219, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": -0.09008461236953735, "rewards/margins": 0.05045522004365921, "rewards/rejected": -0.14053983986377716, "sft_loss": 0.09369887411594391, "step": 40 }, { "epoch": 0.8888888888888888, "grad_norm": 3.07717227935791, "learning_rate": 4.43355687413747e-06, "logits/chosen": -2.4286742210388184, "logits/rejected": -2.4549202919006348, "logps/chosen": -0.6461768746376038, "logps/rejected": -1.0805357694625854, "loss": 0.6945, "odds_ratio_loss": 6.29970645904541, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.06461768597364426, "rewards/margins": 0.04343589395284653, "rewards/rejected": -0.10805357992649078, "sft_loss": 0.06456876546144485, "step": 50 }, { "epoch": 1.0666666666666667, "grad_norm": 2.27549409866333, "learning_rate": 4.064526968101844e-06, "logits/chosen": -2.317702531814575, "logits/rejected": -2.337463855743408, "logps/chosen": -0.5583322048187256, "logps/rejected": -1.0102574825286865, "loss": 0.6081, "odds_ratio_loss": 5.508663177490234, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.05583322048187256, "rewards/margins": 0.04519252851605415, "rewards/rejected": -0.10102574527263641, "sft_loss": 0.05719046667218208, "step": 60 }, { "epoch": 1.2444444444444445, "grad_norm": 2.98760724067688, "learning_rate": 3.6280191288478437e-06, "logits/chosen": -2.4034271240234375, "logits/rejected": -2.4294583797454834, "logps/chosen": -0.35430365800857544, "logps/rejected": -0.7878178358078003, "loss": 0.3968, "odds_ratio_loss": 3.5909438133239746, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.035430364310741425, "rewards/margins": 0.0433514229953289, "rewards/rejected": -0.07878179103136063, "sft_loss": 0.037743426859378815, "step": 70 }, { "epoch": 1.4222222222222223, "grad_norm": 2.200446605682373, "learning_rate": 3.142859907420615e-06, "logits/chosen": -2.336760997772217, "logits/rejected": -2.365061044692993, "logps/chosen": -0.2458254098892212, "logps/rejected": -0.6315831542015076, "loss": 0.2882, "odds_ratio_loss": 2.5942039489746094, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.02458254061639309, "rewards/margins": 0.03857577592134476, "rewards/rejected": -0.063158318400383, "sft_loss": 0.028748363256454468, "step": 80 }, { "epoch": 1.6, "grad_norm": 1.6536734104156494, "learning_rate": 2.629974185404951e-06, "logits/chosen": -2.313284397125244, "logits/rejected": -2.3488173484802246, "logps/chosen": -0.1942504495382309, "logps/rejected": -0.6369145512580872, "loss": 0.2406, "odds_ratio_loss": 2.1658334732055664, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.01942504197359085, "rewards/margins": 0.04426640644669533, "rewards/rejected": -0.06369145214557648, "sft_loss": 0.023997236043214798, "step": 90 }, { "epoch": 1.7777777777777777, "grad_norm": 1.2765487432479858, "learning_rate": 2.1114826863194882e-06, "logits/chosen": -2.2804689407348633, "logits/rejected": -2.3144404888153076, "logps/chosen": -0.13879844546318054, "logps/rejected": -0.6243221163749695, "loss": 0.1759, "odds_ratio_loss": 1.5965832471847534, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.013879844918847084, "rewards/margins": 0.048552367836236954, "rewards/rejected": -0.06243220716714859, "sft_loss": 0.01626196689903736, "step": 100 }, { "epoch": 1.9555555555555557, "grad_norm": 1.2211233377456665, "learning_rate": 1.6097479104361328e-06, "logits/chosen": -2.317593812942505, "logits/rejected": -2.336158037185669, "logps/chosen": -0.1410079300403595, "logps/rejected": -0.5155030488967896, "loss": 0.1836, "odds_ratio_loss": 1.6868371963500977, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.014100792817771435, "rewards/margins": 0.037449512630701065, "rewards/rejected": -0.051550306379795074, "sft_loss": 0.01492035947740078, "step": 110 }, { "epoch": 2.1333333333333333, "grad_norm": 1.3183128833770752, "learning_rate": 1.1464096417858821e-06, "logits/chosen": -2.290233850479126, "logits/rejected": -2.3088765144348145, "logps/chosen": -0.10447756201028824, "logps/rejected": -0.6317979097366333, "loss": 0.1326, "odds_ratio_loss": 1.1886184215545654, "rewards/accuracies": 0.887499988079071, "rewards/chosen": -0.010447757318615913, "rewards/margins": 0.052732039242982864, "rewards/rejected": -0.06317979097366333, "sft_loss": 0.013712344691157341, "step": 120 }, { "epoch": 2.311111111111111, "grad_norm": 1.7163466215133667, "learning_rate": 7.414516258630245e-07, "logits/chosen": -2.262594223022461, "logits/rejected": -2.284545421600342, "logps/chosen": -0.12122899293899536, "logps/rejected": -0.5315740704536438, "loss": 0.1618, "odds_ratio_loss": 1.4604320526123047, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.012122899293899536, "rewards/margins": 0.0410345084965229, "rewards/rejected": -0.05315741151571274, "sft_loss": 0.015804503113031387, "step": 130 }, { "epoch": 2.488888888888889, "grad_norm": 3.0588433742523193, "learning_rate": 4.123396721497977e-07, "logits/chosen": -2.3231940269470215, "logits/rejected": -2.34096622467041, "logps/chosen": -0.12840591371059418, "logps/rejected": -0.5461179614067078, "loss": 0.1688, "odds_ratio_loss": 1.521639108657837, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.012840591371059418, "rewards/margins": 0.04177120327949524, "rewards/rejected": -0.05461179465055466, "sft_loss": 0.016592377796769142, "step": 140 }, { "epoch": 2.6666666666666665, "grad_norm": 2.731790065765381, "learning_rate": 1.7326835503629542e-07, "logits/chosen": -2.2994039058685303, "logits/rejected": -2.324022054672241, "logps/chosen": -0.12199757248163223, "logps/rejected": -0.5101950168609619, "loss": 0.1648, "odds_ratio_loss": 1.4926129579544067, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.012199757620692253, "rewards/margins": 0.03881974145770073, "rewards/rejected": -0.05101950094103813, "sft_loss": 0.015529977157711983, "step": 150 }, { "epoch": 2.8444444444444446, "grad_norm": 1.6209518909454346, "learning_rate": 3.4548802869627806e-08, "logits/chosen": -2.19964599609375, "logits/rejected": -2.222959041595459, "logps/chosen": -0.11084076017141342, "logps/rejected": -0.5317128300666809, "loss": 0.1456, "odds_ratio_loss": 1.3233401775360107, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.011084076017141342, "rewards/margins": 0.042087212204933167, "rewards/rejected": -0.05317128449678421, "sft_loss": 0.013311339542269707, "step": 160 }, { "epoch": 2.986666666666667, "step": 168, "total_flos": 4.410496642646016e+16, "train_loss": 0.4940077399923688, "train_runtime": 519.2548, "train_samples_per_second": 5.2, "train_steps_per_second": 0.324 } ], "logging_steps": 10, "max_steps": 168, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.410496642646016e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }