|
{ |
|
"best_metric": 0.006335424259305, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.3401360544217687, |
|
"eval_steps": 25, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.006802721088435374, |
|
"grad_norm": 8.384090423583984, |
|
"learning_rate": 5e-05, |
|
"loss": 6.1569, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.006802721088435374, |
|
"eval_loss": 7.6087327003479, |
|
"eval_runtime": 19.4438, |
|
"eval_samples_per_second": 50.916, |
|
"eval_steps_per_second": 6.377, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.013605442176870748, |
|
"grad_norm": 9.042967796325684, |
|
"learning_rate": 0.0001, |
|
"loss": 6.3495, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.02040816326530612, |
|
"grad_norm": 7.906692028045654, |
|
"learning_rate": 9.989294616193017e-05, |
|
"loss": 6.1599, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.027210884353741496, |
|
"grad_norm": 7.033726215362549, |
|
"learning_rate": 9.957224306869053e-05, |
|
"loss": 5.2656, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.034013605442176874, |
|
"grad_norm": 7.941586494445801, |
|
"learning_rate": 9.903926402016153e-05, |
|
"loss": 4.3318, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.04081632653061224, |
|
"grad_norm": 8.245903968811035, |
|
"learning_rate": 9.829629131445342e-05, |
|
"loss": 3.0846, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.047619047619047616, |
|
"grad_norm": 8.305931091308594, |
|
"learning_rate": 9.73465064747553e-05, |
|
"loss": 2.3041, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.05442176870748299, |
|
"grad_norm": 6.832699775695801, |
|
"learning_rate": 9.619397662556435e-05, |
|
"loss": 1.5622, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.061224489795918366, |
|
"grad_norm": 4.221113204956055, |
|
"learning_rate": 9.484363707663442e-05, |
|
"loss": 1.1772, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.06802721088435375, |
|
"grad_norm": 2.9281857013702393, |
|
"learning_rate": 9.330127018922194e-05, |
|
"loss": 0.7651, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07482993197278912, |
|
"grad_norm": 3.394575357437134, |
|
"learning_rate": 9.157348061512727e-05, |
|
"loss": 0.9967, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.08163265306122448, |
|
"grad_norm": 5.905345439910889, |
|
"learning_rate": 8.966766701456177e-05, |
|
"loss": 1.1588, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.08843537414965986, |
|
"grad_norm": 4.155373573303223, |
|
"learning_rate": 8.759199037394887e-05, |
|
"loss": 0.7795, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.09523809523809523, |
|
"grad_norm": 2.751556634902954, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 0.1565, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.10204081632653061, |
|
"grad_norm": 1.0200952291488647, |
|
"learning_rate": 8.296729075500344e-05, |
|
"loss": 0.1343, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.10884353741496598, |
|
"grad_norm": 0.6103468537330627, |
|
"learning_rate": 8.043807145043604e-05, |
|
"loss": 0.0584, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.11564625850340136, |
|
"grad_norm": 0.23342931270599365, |
|
"learning_rate": 7.777851165098012e-05, |
|
"loss": 0.0042, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.12244897959183673, |
|
"grad_norm": 1.3207299709320068, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 0.017, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.1292517006802721, |
|
"grad_norm": 0.262893944978714, |
|
"learning_rate": 7.211443451095007e-05, |
|
"loss": 0.0029, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.1360544217687075, |
|
"grad_norm": 0.7945185303688049, |
|
"learning_rate": 6.91341716182545e-05, |
|
"loss": 0.0357, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.14285714285714285, |
|
"grad_norm": 0.5545347929000854, |
|
"learning_rate": 6.607197326515808e-05, |
|
"loss": 0.0314, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.14965986394557823, |
|
"grad_norm": 0.06888148933649063, |
|
"learning_rate": 6.294095225512603e-05, |
|
"loss": 0.001, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.1564625850340136, |
|
"grad_norm": 0.6578580737113953, |
|
"learning_rate": 5.9754516100806423e-05, |
|
"loss": 0.0213, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.16326530612244897, |
|
"grad_norm": 0.9135988354682922, |
|
"learning_rate": 5.6526309611002594e-05, |
|
"loss": 0.0296, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.17006802721088435, |
|
"grad_norm": 1.6072754859924316, |
|
"learning_rate": 5.327015646150716e-05, |
|
"loss": 0.0584, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.17006802721088435, |
|
"eval_loss": 0.03923076018691063, |
|
"eval_runtime": 17.9785, |
|
"eval_samples_per_second": 55.066, |
|
"eval_steps_per_second": 6.897, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.17687074829931973, |
|
"grad_norm": 0.8757835030555725, |
|
"learning_rate": 5e-05, |
|
"loss": 0.0741, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.1836734693877551, |
|
"grad_norm": 0.6202448010444641, |
|
"learning_rate": 4.6729843538492847e-05, |
|
"loss": 0.0278, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.19047619047619047, |
|
"grad_norm": 0.2728913724422455, |
|
"learning_rate": 4.347369038899744e-05, |
|
"loss": 0.0152, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.19727891156462585, |
|
"grad_norm": 0.35162627696990967, |
|
"learning_rate": 4.0245483899193595e-05, |
|
"loss": 0.0054, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.20408163265306123, |
|
"grad_norm": 0.26197877526283264, |
|
"learning_rate": 3.705904774487396e-05, |
|
"loss": 0.0038, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2108843537414966, |
|
"grad_norm": 0.02254539169371128, |
|
"learning_rate": 3.392802673484193e-05, |
|
"loss": 0.0002, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.21768707482993196, |
|
"grad_norm": 0.5768834948539734, |
|
"learning_rate": 3.086582838174551e-05, |
|
"loss": 0.0592, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.22448979591836735, |
|
"grad_norm": 0.03499084711074829, |
|
"learning_rate": 2.7885565489049946e-05, |
|
"loss": 0.0004, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.23129251700680273, |
|
"grad_norm": 0.11083720624446869, |
|
"learning_rate": 2.500000000000001e-05, |
|
"loss": 0.001, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.23809523809523808, |
|
"grad_norm": 0.09705158323049545, |
|
"learning_rate": 2.2221488349019903e-05, |
|
"loss": 0.0009, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.24489795918367346, |
|
"grad_norm": 0.25740352272987366, |
|
"learning_rate": 1.9561928549563968e-05, |
|
"loss": 0.0076, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.25170068027210885, |
|
"grad_norm": 0.5679620504379272, |
|
"learning_rate": 1.703270924499656e-05, |
|
"loss": 0.0165, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.2585034013605442, |
|
"grad_norm": 0.4646860659122467, |
|
"learning_rate": 1.4644660940672627e-05, |
|
"loss": 0.0172, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.2653061224489796, |
|
"grad_norm": 0.19410698115825653, |
|
"learning_rate": 1.2408009626051137e-05, |
|
"loss": 0.004, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.272108843537415, |
|
"grad_norm": 0.2193833589553833, |
|
"learning_rate": 1.0332332985438248e-05, |
|
"loss": 0.0106, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2789115646258503, |
|
"grad_norm": 0.6219828724861145, |
|
"learning_rate": 8.426519384872733e-06, |
|
"loss": 0.014, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.2857142857142857, |
|
"grad_norm": 0.018709324300289154, |
|
"learning_rate": 6.698729810778065e-06, |
|
"loss": 0.0003, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.2925170068027211, |
|
"grad_norm": 0.037566523998975754, |
|
"learning_rate": 5.156362923365588e-06, |
|
"loss": 0.0003, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.29931972789115646, |
|
"grad_norm": 0.37556684017181396, |
|
"learning_rate": 3.8060233744356633e-06, |
|
"loss": 0.0064, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.30612244897959184, |
|
"grad_norm": 0.027847126126289368, |
|
"learning_rate": 2.653493525244721e-06, |
|
"loss": 0.0002, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.3129251700680272, |
|
"grad_norm": 0.015570895746350288, |
|
"learning_rate": 1.70370868554659e-06, |
|
"loss": 0.0003, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.3197278911564626, |
|
"grad_norm": 0.3432367146015167, |
|
"learning_rate": 9.607359798384785e-07, |
|
"loss": 0.005, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.32653061224489793, |
|
"grad_norm": 0.11973174661397934, |
|
"learning_rate": 4.277569313094809e-07, |
|
"loss": 0.0015, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 0.04032721742987633, |
|
"learning_rate": 1.0705383806982606e-07, |
|
"loss": 0.0005, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.3401360544217687, |
|
"grad_norm": 0.1603790819644928, |
|
"learning_rate": 0.0, |
|
"loss": 0.0019, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3401360544217687, |
|
"eval_loss": 0.006335424259305, |
|
"eval_runtime": 18.3293, |
|
"eval_samples_per_second": 54.012, |
|
"eval_steps_per_second": 6.765, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.234825437211525e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|