|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.27952480782669464, |
|
"eval_steps": 13, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005590496156533892, |
|
"grad_norm": 2.64302659034729, |
|
"learning_rate": 5e-05, |
|
"loss": 3.5342, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.005590496156533892, |
|
"eval_loss": 3.5435373783111572, |
|
"eval_runtime": 101.8278, |
|
"eval_samples_per_second": 11.834, |
|
"eval_steps_per_second": 2.966, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.011180992313067784, |
|
"grad_norm": 2.6918563842773438, |
|
"learning_rate": 0.0001, |
|
"loss": 3.4943, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.016771488469601678, |
|
"grad_norm": 2.663547992706299, |
|
"learning_rate": 9.989294616193017e-05, |
|
"loss": 3.4678, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.02236198462613557, |
|
"grad_norm": 3.2312190532684326, |
|
"learning_rate": 9.957224306869053e-05, |
|
"loss": 3.2118, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.027952480782669462, |
|
"grad_norm": 2.7595348358154297, |
|
"learning_rate": 9.903926402016153e-05, |
|
"loss": 2.6675, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.033542976939203356, |
|
"grad_norm": 1.419068455696106, |
|
"learning_rate": 9.829629131445342e-05, |
|
"loss": 2.22, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.039133473095737246, |
|
"grad_norm": 0.7016229033470154, |
|
"learning_rate": 9.73465064747553e-05, |
|
"loss": 2.0622, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.04472396925227114, |
|
"grad_norm": 0.6586772799491882, |
|
"learning_rate": 9.619397662556435e-05, |
|
"loss": 2.0141, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.050314465408805034, |
|
"grad_norm": 0.46751201152801514, |
|
"learning_rate": 9.484363707663442e-05, |
|
"loss": 1.9571, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.055904961565338925, |
|
"grad_norm": 0.7789000272750854, |
|
"learning_rate": 9.330127018922194e-05, |
|
"loss": 1.9574, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.061495457721872815, |
|
"grad_norm": 0.31843599677085876, |
|
"learning_rate": 9.157348061512727e-05, |
|
"loss": 1.9238, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.06708595387840671, |
|
"grad_norm": 0.27857884764671326, |
|
"learning_rate": 8.966766701456177e-05, |
|
"loss": 1.915, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0726764500349406, |
|
"grad_norm": 0.43947434425354004, |
|
"learning_rate": 8.759199037394887e-05, |
|
"loss": 1.9201, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0726764500349406, |
|
"eval_loss": 1.9032824039459229, |
|
"eval_runtime": 101.9948, |
|
"eval_samples_per_second": 11.814, |
|
"eval_steps_per_second": 2.961, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.07826694619147449, |
|
"grad_norm": 0.37218666076660156, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 1.9066, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.08385744234800839, |
|
"grad_norm": 0.41769397258758545, |
|
"learning_rate": 8.296729075500344e-05, |
|
"loss": 1.8943, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.08944793850454227, |
|
"grad_norm": 0.4944588541984558, |
|
"learning_rate": 8.043807145043604e-05, |
|
"loss": 1.8743, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.09503843466107617, |
|
"grad_norm": 0.3327654004096985, |
|
"learning_rate": 7.777851165098012e-05, |
|
"loss": 1.8726, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.10062893081761007, |
|
"grad_norm": 0.4804844856262207, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 1.8517, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.10621942697414395, |
|
"grad_norm": 0.5033496022224426, |
|
"learning_rate": 7.211443451095007e-05, |
|
"loss": 1.8815, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.11180992313067785, |
|
"grad_norm": 0.4059678316116333, |
|
"learning_rate": 6.91341716182545e-05, |
|
"loss": 1.8633, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.11740041928721175, |
|
"grad_norm": 0.5624929070472717, |
|
"learning_rate": 6.607197326515808e-05, |
|
"loss": 1.8528, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.12299091544374563, |
|
"grad_norm": 0.30437928438186646, |
|
"learning_rate": 6.294095225512603e-05, |
|
"loss": 1.8548, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.12858141160027953, |
|
"grad_norm": 0.4757891297340393, |
|
"learning_rate": 5.9754516100806423e-05, |
|
"loss": 1.8662, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.13417190775681342, |
|
"grad_norm": 0.5576551556587219, |
|
"learning_rate": 5.6526309611002594e-05, |
|
"loss": 1.8525, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.13976240391334732, |
|
"grad_norm": 0.301616907119751, |
|
"learning_rate": 5.327015646150716e-05, |
|
"loss": 1.8448, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.1453529000698812, |
|
"grad_norm": 0.3822285532951355, |
|
"learning_rate": 5e-05, |
|
"loss": 1.841, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.1453529000698812, |
|
"eval_loss": 1.8403270244598389, |
|
"eval_runtime": 101.9955, |
|
"eval_samples_per_second": 11.814, |
|
"eval_steps_per_second": 2.961, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.1509433962264151, |
|
"grad_norm": 0.4914432764053345, |
|
"learning_rate": 4.6729843538492847e-05, |
|
"loss": 1.8609, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.15653389238294899, |
|
"grad_norm": 0.35133427381515503, |
|
"learning_rate": 4.347369038899744e-05, |
|
"loss": 1.8361, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.16212438853948288, |
|
"grad_norm": 0.3233548104763031, |
|
"learning_rate": 4.0245483899193595e-05, |
|
"loss": 1.8291, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.16771488469601678, |
|
"grad_norm": 0.4606425166130066, |
|
"learning_rate": 3.705904774487396e-05, |
|
"loss": 1.855, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.17330538085255065, |
|
"grad_norm": 0.7441600561141968, |
|
"learning_rate": 3.392802673484193e-05, |
|
"loss": 1.8387, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.17889587700908455, |
|
"grad_norm": 0.3128848373889923, |
|
"learning_rate": 3.086582838174551e-05, |
|
"loss": 1.8219, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.18448637316561844, |
|
"grad_norm": 0.33925583958625793, |
|
"learning_rate": 2.7885565489049946e-05, |
|
"loss": 1.8055, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.19007686932215234, |
|
"grad_norm": 0.6340675950050354, |
|
"learning_rate": 2.500000000000001e-05, |
|
"loss": 1.8227, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.19566736547868624, |
|
"grad_norm": 0.7749468684196472, |
|
"learning_rate": 2.2221488349019903e-05, |
|
"loss": 1.8204, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.20125786163522014, |
|
"grad_norm": 0.3163313865661621, |
|
"learning_rate": 1.9561928549563968e-05, |
|
"loss": 1.7955, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.206848357791754, |
|
"grad_norm": 0.3138035833835602, |
|
"learning_rate": 1.703270924499656e-05, |
|
"loss": 1.8125, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.2124388539482879, |
|
"grad_norm": 0.3796585500240326, |
|
"learning_rate": 1.4644660940672627e-05, |
|
"loss": 1.7958, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.2180293501048218, |
|
"grad_norm": 0.34551334381103516, |
|
"learning_rate": 1.2408009626051137e-05, |
|
"loss": 1.7706, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.2180293501048218, |
|
"eval_loss": 1.7951258420944214, |
|
"eval_runtime": 101.982, |
|
"eval_samples_per_second": 11.816, |
|
"eval_steps_per_second": 2.961, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.2236198462613557, |
|
"grad_norm": 0.5342937707901001, |
|
"learning_rate": 1.0332332985438248e-05, |
|
"loss": 1.8122, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2292103424178896, |
|
"grad_norm": 0.5158257484436035, |
|
"learning_rate": 8.426519384872733e-06, |
|
"loss": 1.7735, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.2348008385744235, |
|
"grad_norm": 0.4467351734638214, |
|
"learning_rate": 6.698729810778065e-06, |
|
"loss": 1.794, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.24039133473095736, |
|
"grad_norm": 0.5328588485717773, |
|
"learning_rate": 5.156362923365588e-06, |
|
"loss": 1.7981, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.24598183088749126, |
|
"grad_norm": 0.30072852969169617, |
|
"learning_rate": 3.8060233744356633e-06, |
|
"loss": 1.8019, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.25157232704402516, |
|
"grad_norm": 0.33151668310165405, |
|
"learning_rate": 2.653493525244721e-06, |
|
"loss": 1.7739, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.25716282320055905, |
|
"grad_norm": 0.5027835965156555, |
|
"learning_rate": 1.70370868554659e-06, |
|
"loss": 1.792, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.26275331935709295, |
|
"grad_norm": 0.2943630516529083, |
|
"learning_rate": 9.607359798384785e-07, |
|
"loss": 1.7813, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.26834381551362685, |
|
"grad_norm": 0.37901201844215393, |
|
"learning_rate": 4.277569313094809e-07, |
|
"loss": 1.8137, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.27393431167016075, |
|
"grad_norm": 0.6345136165618896, |
|
"learning_rate": 1.0705383806982606e-07, |
|
"loss": 1.8183, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.27952480782669464, |
|
"grad_norm": 0.42185863852500916, |
|
"learning_rate": 0.0, |
|
"loss": 1.7685, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 13, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0455434309664768e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|