|
{ |
|
"best_metric": 1.8114935159683228, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.5708169818052087, |
|
"eval_steps": 25, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.011416339636104174, |
|
"grad_norm": 7.898479461669922, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 3.839, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.011416339636104174, |
|
"eval_loss": 4.370181083679199, |
|
"eval_runtime": 4.6814, |
|
"eval_samples_per_second": 10.681, |
|
"eval_steps_per_second": 1.495, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.022832679272208348, |
|
"grad_norm": 7.888498306274414, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 3.8169, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.03424901890831252, |
|
"grad_norm": 8.71157455444336, |
|
"learning_rate": 8.999999999999999e-05, |
|
"loss": 3.9081, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.045665358544416695, |
|
"grad_norm": 7.415948867797852, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 3.4078, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.05708169818052087, |
|
"grad_norm": 4.761682033538818, |
|
"learning_rate": 0.00015, |
|
"loss": 2.8352, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.06849803781662504, |
|
"grad_norm": 4.246852397918701, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 2.5466, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.07991437745272922, |
|
"grad_norm": 3.3111937046051025, |
|
"learning_rate": 0.00020999999999999998, |
|
"loss": 2.3276, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.09133071708883339, |
|
"grad_norm": 4.848330020904541, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 2.3207, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.10274705672493757, |
|
"grad_norm": 2.9185802936553955, |
|
"learning_rate": 0.00027, |
|
"loss": 2.2468, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.11416339636104174, |
|
"grad_norm": 2.7042579650878906, |
|
"learning_rate": 0.0003, |
|
"loss": 1.9733, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12557973599714592, |
|
"grad_norm": 3.202441692352295, |
|
"learning_rate": 0.0002999794957488703, |
|
"loss": 2.0887, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.1369960756332501, |
|
"grad_norm": 2.5777502059936523, |
|
"learning_rate": 0.0002999179886011389, |
|
"loss": 1.6886, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.14841241526935425, |
|
"grad_norm": 3.485649347305298, |
|
"learning_rate": 0.0002998154953722457, |
|
"loss": 1.9007, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.15982875490545845, |
|
"grad_norm": 2.80680775642395, |
|
"learning_rate": 0.00029967204408281613, |
|
"loss": 1.9419, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.17124509454156261, |
|
"grad_norm": 2.477963924407959, |
|
"learning_rate": 0.00029948767395100045, |
|
"loss": 1.8113, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.18266143417766678, |
|
"grad_norm": 3.5928072929382324, |
|
"learning_rate": 0.0002992624353817517, |
|
"loss": 2.0529, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.19407777381377095, |
|
"grad_norm": 2.8333468437194824, |
|
"learning_rate": 0.0002989963899530457, |
|
"loss": 1.9951, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.20549411344987514, |
|
"grad_norm": 2.8093178272247314, |
|
"learning_rate": 0.00029868961039904624, |
|
"loss": 1.9505, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.2169104530859793, |
|
"grad_norm": 3.1787095069885254, |
|
"learning_rate": 0.00029834218059022024, |
|
"loss": 1.9297, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.22832679272208348, |
|
"grad_norm": 2.9454715251922607, |
|
"learning_rate": 0.00029795419551040833, |
|
"loss": 1.9485, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.23974313235818764, |
|
"grad_norm": 3.2813782691955566, |
|
"learning_rate": 0.00029752576123085736, |
|
"loss": 2.0743, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.25115947199429184, |
|
"grad_norm": 2.510538101196289, |
|
"learning_rate": 0.0002970569948812214, |
|
"loss": 1.9988, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.262575811630396, |
|
"grad_norm": 2.534315586090088, |
|
"learning_rate": 0.0002965480246175399, |
|
"loss": 1.9098, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.2739921512665002, |
|
"grad_norm": 2.1965324878692627, |
|
"learning_rate": 0.0002959989895872009, |
|
"loss": 1.7517, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.28540849090260434, |
|
"grad_norm": 2.1002790927886963, |
|
"learning_rate": 0.0002954100398908995, |
|
"loss": 1.6897, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.28540849090260434, |
|
"eval_loss": 1.9441114664077759, |
|
"eval_runtime": 4.7777, |
|
"eval_samples_per_second": 10.465, |
|
"eval_steps_per_second": 1.465, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.2968248305387085, |
|
"grad_norm": 2.207418918609619, |
|
"learning_rate": 0.0002947813365416023, |
|
"loss": 1.95, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.3082411701748127, |
|
"grad_norm": 2.6818461418151855, |
|
"learning_rate": 0.0002941130514205272, |
|
"loss": 1.9706, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.3196575098109169, |
|
"grad_norm": 2.279365062713623, |
|
"learning_rate": 0.0002934053672301536, |
|
"loss": 1.6524, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.33107384944702106, |
|
"grad_norm": 2.2755017280578613, |
|
"learning_rate": 0.00029265847744427303, |
|
"loss": 1.7105, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.34249018908312523, |
|
"grad_norm": 2.5415656566619873, |
|
"learning_rate": 0.00029187258625509513, |
|
"loss": 1.8029, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.3539065287192294, |
|
"grad_norm": 2.6460940837860107, |
|
"learning_rate": 0.00029104790851742417, |
|
"loss": 1.8104, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.36532286835533356, |
|
"grad_norm": 2.3558075428009033, |
|
"learning_rate": 0.0002901846696899191, |
|
"loss": 1.8417, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.37673920799143773, |
|
"grad_norm": 2.1366970539093018, |
|
"learning_rate": 0.00028928310577345606, |
|
"loss": 1.7281, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.3881555476275419, |
|
"grad_norm": 2.147944450378418, |
|
"learning_rate": 0.0002883434632466077, |
|
"loss": 1.7001, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.3995718872636461, |
|
"grad_norm": 1.9789509773254395, |
|
"learning_rate": 0.00028736599899825856, |
|
"loss": 1.7427, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.4109882268997503, |
|
"grad_norm": 2.110389471054077, |
|
"learning_rate": 0.00028635098025737434, |
|
"loss": 1.8732, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.42240456653585445, |
|
"grad_norm": 2.150489568710327, |
|
"learning_rate": 0.00028529868451994384, |
|
"loss": 1.6447, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.4338209061719586, |
|
"grad_norm": 2.31097674369812, |
|
"learning_rate": 0.0002842093994731145, |
|
"loss": 1.8932, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.4452372458080628, |
|
"grad_norm": 2.4540700912475586, |
|
"learning_rate": 0.00028308342291654174, |
|
"loss": 1.8748, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.45665358544416695, |
|
"grad_norm": 2.4295310974121094, |
|
"learning_rate": 0.00028192106268097334, |
|
"loss": 1.8415, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.4680699250802711, |
|
"grad_norm": 3.067713737487793, |
|
"learning_rate": 0.00028072263654409154, |
|
"loss": 1.9419, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.4794862647163753, |
|
"grad_norm": 2.9190239906311035, |
|
"learning_rate": 0.0002794884721436361, |
|
"loss": 1.9494, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.4909026043524795, |
|
"grad_norm": 2.983987808227539, |
|
"learning_rate": 0.00027821890688783083, |
|
"loss": 1.8778, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.5023189439885837, |
|
"grad_norm": 3.0087473392486572, |
|
"learning_rate": 0.0002769142878631403, |
|
"loss": 1.8793, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.5137352836246878, |
|
"grad_norm": 1.934885859489441, |
|
"learning_rate": 0.00027557497173937923, |
|
"loss": 1.6278, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.525151623260792, |
|
"grad_norm": 1.9108154773712158, |
|
"learning_rate": 0.000274201324672203, |
|
"loss": 1.8105, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.5365679628968962, |
|
"grad_norm": 2.1089630126953125, |
|
"learning_rate": 0.00027279372220300385, |
|
"loss": 1.675, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.5479843025330003, |
|
"grad_norm": 2.15185546875, |
|
"learning_rate": 0.0002713525491562421, |
|
"loss": 1.8751, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.5594006421691046, |
|
"grad_norm": 2.311039686203003, |
|
"learning_rate": 0.00026987819953423867, |
|
"loss": 1.8339, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.5708169818052087, |
|
"grad_norm": 2.1658663749694824, |
|
"learning_rate": 0.00026837107640945905, |
|
"loss": 1.6501, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5708169818052087, |
|
"eval_loss": 1.8114935159683228, |
|
"eval_runtime": 4.7797, |
|
"eval_samples_per_second": 10.461, |
|
"eval_steps_per_second": 1.465, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.14825259139072e+16, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|