|
{ |
|
"best_metric": 1.0260274410247803, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 1.0255427841634739, |
|
"eval_steps": 25, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.020434227330779056, |
|
"grad_norm": 7.360929489135742, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 17.3749, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.020434227330779056, |
|
"eval_loss": 1.138229489326477, |
|
"eval_runtime": 21.2669, |
|
"eval_samples_per_second": 7.759, |
|
"eval_steps_per_second": 1.975, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.04086845466155811, |
|
"grad_norm": 6.317312717437744, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 17.9715, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.06130268199233716, |
|
"grad_norm": 6.0731658935546875, |
|
"learning_rate": 1.5e-06, |
|
"loss": 16.5836, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.08173690932311622, |
|
"grad_norm": 5.645073413848877, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 16.209, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.10217113665389528, |
|
"grad_norm": 7.367918014526367, |
|
"learning_rate": 2.5e-06, |
|
"loss": 17.2826, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.12260536398467432, |
|
"grad_norm": 6.8022966384887695, |
|
"learning_rate": 3e-06, |
|
"loss": 16.758, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.14303959131545338, |
|
"grad_norm": 6.725194931030273, |
|
"learning_rate": 3.5000000000000004e-06, |
|
"loss": 19.0619, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.16347381864623245, |
|
"grad_norm": 6.638408660888672, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 17.6671, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.1839080459770115, |
|
"grad_norm": 5.699502468109131, |
|
"learning_rate": 4.5e-06, |
|
"loss": 17.215, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.20434227330779056, |
|
"grad_norm": 6.1157097816467285, |
|
"learning_rate": 5e-06, |
|
"loss": 18.4525, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.2247765006385696, |
|
"grad_norm": 5.536643981933594, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 15.6788, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.24521072796934865, |
|
"grad_norm": 7.206900596618652, |
|
"learning_rate": 6e-06, |
|
"loss": 17.0678, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.2656449553001277, |
|
"grad_norm": 5.241375923156738, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 18.2783, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.28607918263090676, |
|
"grad_norm": 5.224916934967041, |
|
"learning_rate": 7.000000000000001e-06, |
|
"loss": 18.9804, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.3065134099616858, |
|
"grad_norm": 4.611645698547363, |
|
"learning_rate": 7.5e-06, |
|
"loss": 18.8118, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.3269476372924649, |
|
"grad_norm": 5.321063995361328, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 16.1292, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.34738186462324394, |
|
"grad_norm": 4.779561519622803, |
|
"learning_rate": 8.500000000000002e-06, |
|
"loss": 17.0707, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.367816091954023, |
|
"grad_norm": 4.141415119171143, |
|
"learning_rate": 9e-06, |
|
"loss": 19.2067, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.388250319284802, |
|
"grad_norm": 4.518102645874023, |
|
"learning_rate": 9.5e-06, |
|
"loss": 16.8985, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.4086845466155811, |
|
"grad_norm": 4.083268165588379, |
|
"learning_rate": 1e-05, |
|
"loss": 18.5685, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.42911877394636017, |
|
"grad_norm": 4.17296028137207, |
|
"learning_rate": 1.05e-05, |
|
"loss": 16.5411, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.4495530012771392, |
|
"grad_norm": 3.7888073921203613, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 16.498, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.46998722860791825, |
|
"grad_norm": 3.7531864643096924, |
|
"learning_rate": 1.1500000000000002e-05, |
|
"loss": 15.8281, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.4904214559386973, |
|
"grad_norm": 3.5403201580047607, |
|
"learning_rate": 1.2e-05, |
|
"loss": 16.5056, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.5108556832694764, |
|
"grad_norm": 3.830643653869629, |
|
"learning_rate": 1.25e-05, |
|
"loss": 16.6906, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.5108556832694764, |
|
"eval_loss": 1.084327220916748, |
|
"eval_runtime": 21.5708, |
|
"eval_samples_per_second": 7.649, |
|
"eval_steps_per_second": 1.947, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.5312899106002554, |
|
"grad_norm": 3.512051582336426, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 17.1562, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.5517241379310345, |
|
"grad_norm": 3.581163167953491, |
|
"learning_rate": 1.3500000000000001e-05, |
|
"loss": 15.3294, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.5721583652618135, |
|
"grad_norm": 3.7532386779785156, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"loss": 18.2124, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.5925925925925926, |
|
"grad_norm": 3.3712620735168457, |
|
"learning_rate": 1.45e-05, |
|
"loss": 15.1985, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.6130268199233716, |
|
"grad_norm": 3.74042010307312, |
|
"learning_rate": 1.5e-05, |
|
"loss": 16.3185, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.6334610472541508, |
|
"grad_norm": 3.747729778289795, |
|
"learning_rate": 1.55e-05, |
|
"loss": 18.091, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.6538952745849298, |
|
"grad_norm": 3.3717548847198486, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 15.5985, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.6743295019157088, |
|
"grad_norm": 3.1136016845703125, |
|
"learning_rate": 1.65e-05, |
|
"loss": 16.1861, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.6947637292464879, |
|
"grad_norm": 2.9378445148468018, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"loss": 15.428, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.7151979565772669, |
|
"grad_norm": 3.0630242824554443, |
|
"learning_rate": 1.75e-05, |
|
"loss": 15.6852, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.735632183908046, |
|
"grad_norm": 3.5427470207214355, |
|
"learning_rate": 1.8e-05, |
|
"loss": 15.8646, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.756066411238825, |
|
"grad_norm": 3.6774771213531494, |
|
"learning_rate": 1.85e-05, |
|
"loss": 15.4281, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.776500638569604, |
|
"grad_norm": 3.539029836654663, |
|
"learning_rate": 1.9e-05, |
|
"loss": 16.3628, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.7969348659003831, |
|
"grad_norm": 3.6181910037994385, |
|
"learning_rate": 1.9500000000000003e-05, |
|
"loss": 16.0907, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.8173690932311622, |
|
"grad_norm": 3.2122557163238525, |
|
"learning_rate": 2e-05, |
|
"loss": 16.8239, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8378033205619413, |
|
"grad_norm": 3.049232006072998, |
|
"learning_rate": 2.05e-05, |
|
"loss": 17.2511, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.8582375478927203, |
|
"grad_norm": 3.254066228866577, |
|
"learning_rate": 2.1e-05, |
|
"loss": 16.2736, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.8786717752234994, |
|
"grad_norm": 3.0137479305267334, |
|
"learning_rate": 2.15e-05, |
|
"loss": 16.5703, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.8991060025542784, |
|
"grad_norm": 3.380715847015381, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 16.8219, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.9195402298850575, |
|
"grad_norm": 2.935159921646118, |
|
"learning_rate": 2.25e-05, |
|
"loss": 16.5484, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.9399744572158365, |
|
"grad_norm": 2.9660491943359375, |
|
"learning_rate": 2.3000000000000003e-05, |
|
"loss": 15.6773, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.9604086845466155, |
|
"grad_norm": 2.7908406257629395, |
|
"learning_rate": 2.35e-05, |
|
"loss": 15.7697, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.9808429118773946, |
|
"grad_norm": 3.0066637992858887, |
|
"learning_rate": 2.4e-05, |
|
"loss": 17.0586, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.0051085568326947, |
|
"grad_norm": 3.090890645980835, |
|
"learning_rate": 2.45e-05, |
|
"loss": 19.5984, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 1.0255427841634739, |
|
"grad_norm": 2.8104114532470703, |
|
"learning_rate": 2.5e-05, |
|
"loss": 16.0491, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.0255427841634739, |
|
"eval_loss": 1.0260274410247803, |
|
"eval_runtime": 21.5925, |
|
"eval_samples_per_second": 7.642, |
|
"eval_steps_per_second": 1.945, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.6709702668294554e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|