|
{ |
|
"best_metric": NaN, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-25", |
|
"epoch": 0.6257332811888933, |
|
"eval_steps": 25, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.012514665623777864, |
|
"grad_norm": 3.003929615020752, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7707, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.012514665623777864, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.0475, |
|
"eval_samples_per_second": 24.42, |
|
"eval_steps_per_second": 6.349, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.025029331247555728, |
|
"grad_norm": 2.6678109169006348, |
|
"learning_rate": 0.0001, |
|
"loss": 2.3159, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.03754399687133359, |
|
"grad_norm": 2.0421063899993896, |
|
"learning_rate": 9.990365154573717e-05, |
|
"loss": 2.5062, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.050058662495111456, |
|
"grad_norm": 1.8860653638839722, |
|
"learning_rate": 9.961501876182148e-05, |
|
"loss": 2.0819, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.06257332811888933, |
|
"grad_norm": 2.093566656112671, |
|
"learning_rate": 9.913533761814537e-05, |
|
"loss": 2.1698, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.07508799374266718, |
|
"grad_norm": 2.6375949382781982, |
|
"learning_rate": 9.846666218300807e-05, |
|
"loss": 2.1448, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.08760265936644505, |
|
"grad_norm": 2.7502684593200684, |
|
"learning_rate": 9.761185582727977e-05, |
|
"loss": 2.2762, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.10011732499022291, |
|
"grad_norm": 5.241250514984131, |
|
"learning_rate": 9.657457896300791e-05, |
|
"loss": 2.5179, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.11263199061400078, |
|
"grad_norm": 3.217245101928711, |
|
"learning_rate": 9.535927336897098e-05, |
|
"loss": 2.0864, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.12514665623777865, |
|
"grad_norm": 2.6275532245635986, |
|
"learning_rate": 9.397114317029975e-05, |
|
"loss": 1.8957, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13766132186155652, |
|
"grad_norm": 4.345211505889893, |
|
"learning_rate": 9.241613255361455e-05, |
|
"loss": 2.3292, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.15017598748533437, |
|
"grad_norm": 2.715369462966919, |
|
"learning_rate": 9.070090031310558e-05, |
|
"loss": 2.1771, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.16269065310911224, |
|
"grad_norm": 1.941023826599121, |
|
"learning_rate": 8.883279133655399e-05, |
|
"loss": 1.3988, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.1752053187328901, |
|
"grad_norm": 2.244020462036133, |
|
"learning_rate": 8.681980515339464e-05, |
|
"loss": 1.4134, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.18771998435666798, |
|
"grad_norm": 2.06498122215271, |
|
"learning_rate": 8.467056167950311e-05, |
|
"loss": 1.9165, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.20023464998044582, |
|
"grad_norm": 1.6291917562484741, |
|
"learning_rate": 8.239426430539243e-05, |
|
"loss": 1.523, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.2127493156042237, |
|
"grad_norm": 1.4038172960281372, |
|
"learning_rate": 8.000066048588211e-05, |
|
"loss": 1.5351, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.22526398122800156, |
|
"grad_norm": 1.607500672340393, |
|
"learning_rate": 7.75e-05, |
|
"loss": 1.4412, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.23777864685177943, |
|
"grad_norm": 1.7136542797088623, |
|
"learning_rate": 7.490299105985507e-05, |
|
"loss": 1.6684, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.2502933124755573, |
|
"grad_norm": 1.6591033935546875, |
|
"learning_rate": 7.222075445642904e-05, |
|
"loss": 1.6842, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.26280797809933515, |
|
"grad_norm": 2.4633328914642334, |
|
"learning_rate": 6.946477593864228e-05, |
|
"loss": 1.2249, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.27532264372311305, |
|
"grad_norm": 2.102135419845581, |
|
"learning_rate": 6.664685702961344e-05, |
|
"loss": 1.4803, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.2878373093468909, |
|
"grad_norm": 2.2035129070281982, |
|
"learning_rate": 6.377906449072578e-05, |
|
"loss": 1.4066, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.30035197497066873, |
|
"grad_norm": 2.510206937789917, |
|
"learning_rate": 6.087367864990233e-05, |
|
"loss": 1.4071, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.31286664059444663, |
|
"grad_norm": 3.517763376235962, |
|
"learning_rate": 5.794314081535644e-05, |
|
"loss": 1.8974, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.31286664059444663, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.0345, |
|
"eval_samples_per_second": 24.576, |
|
"eval_steps_per_second": 6.39, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.3253813062182245, |
|
"grad_norm": 2.0011110305786133, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 0.991, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.3378959718420024, |
|
"grad_norm": 2.0475192070007324, |
|
"learning_rate": 5.205685918464356e-05, |
|
"loss": 1.3608, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.3504106374657802, |
|
"grad_norm": 1.8558727502822876, |
|
"learning_rate": 4.912632135009769e-05, |
|
"loss": 1.5395, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.36292530308955806, |
|
"grad_norm": 1.5381715297698975, |
|
"learning_rate": 4.6220935509274235e-05, |
|
"loss": 1.183, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.37543996871333596, |
|
"grad_norm": 1.9737073183059692, |
|
"learning_rate": 4.3353142970386564e-05, |
|
"loss": 1.2611, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.3879546343371138, |
|
"grad_norm": 2.4097700119018555, |
|
"learning_rate": 4.053522406135775e-05, |
|
"loss": 1.0897, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.40046929996089164, |
|
"grad_norm": 1.9280041456222534, |
|
"learning_rate": 3.777924554357096e-05, |
|
"loss": 1.2415, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.41298396558466954, |
|
"grad_norm": 1.7732486724853516, |
|
"learning_rate": 3.509700894014496e-05, |
|
"loss": 1.3308, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.4254986312084474, |
|
"grad_norm": 1.8770219087600708, |
|
"learning_rate": 3.250000000000001e-05, |
|
"loss": 1.2773, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.4380132968322253, |
|
"grad_norm": 2.209496021270752, |
|
"learning_rate": 2.9999339514117912e-05, |
|
"loss": 1.1981, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.45052796245600313, |
|
"grad_norm": 2.2610745429992676, |
|
"learning_rate": 2.760573569460757e-05, |
|
"loss": 1.0344, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.46304262807978097, |
|
"grad_norm": 2.2316040992736816, |
|
"learning_rate": 2.53294383204969e-05, |
|
"loss": 1.2909, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.47555729370355887, |
|
"grad_norm": 1.6294275522232056, |
|
"learning_rate": 2.3180194846605367e-05, |
|
"loss": 0.9738, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.4880719593273367, |
|
"grad_norm": 1.528761625289917, |
|
"learning_rate": 2.1167208663446025e-05, |
|
"loss": 0.8498, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.5005866249511146, |
|
"grad_norm": 1.7920118570327759, |
|
"learning_rate": 1.9299099686894423e-05, |
|
"loss": 1.5672, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5131012905748924, |
|
"grad_norm": 1.6099443435668945, |
|
"learning_rate": 1.758386744638546e-05, |
|
"loss": 1.1178, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.5256159561986703, |
|
"grad_norm": 1.7412190437316895, |
|
"learning_rate": 1.602885682970026e-05, |
|
"loss": 1.1435, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.5381306218224482, |
|
"grad_norm": 1.6830037832260132, |
|
"learning_rate": 1.464072663102903e-05, |
|
"loss": 1.075, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.5506452874462261, |
|
"grad_norm": 1.8176605701446533, |
|
"learning_rate": 1.3425421036992098e-05, |
|
"loss": 1.1402, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.5631599530700039, |
|
"grad_norm": 1.6718379259109497, |
|
"learning_rate": 1.2388144172720251e-05, |
|
"loss": 1.1893, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.5756746186937818, |
|
"grad_norm": 1.240828037261963, |
|
"learning_rate": 1.1533337816991932e-05, |
|
"loss": 0.6582, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.5881892843175597, |
|
"grad_norm": 1.8477025032043457, |
|
"learning_rate": 1.0864662381854632e-05, |
|
"loss": 0.9412, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.6007039499413375, |
|
"grad_norm": 2.1296470165252686, |
|
"learning_rate": 1.0384981238178534e-05, |
|
"loss": 0.9925, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.6132186155651154, |
|
"grad_norm": 1.826198935508728, |
|
"learning_rate": 1.0096348454262845e-05, |
|
"loss": 0.9655, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.6257332811888933, |
|
"grad_norm": 2.643702745437622, |
|
"learning_rate": 1e-05, |
|
"loss": 1.618, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6257332811888933, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.0353, |
|
"eval_samples_per_second": 24.567, |
|
"eval_steps_per_second": 6.387, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 1 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.589463780163584e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|