{ "best_metric": 1.8114935159683228, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.5708169818052087, "eval_steps": 25, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.011416339636104174, "grad_norm": 7.898479461669922, "learning_rate": 2.9999999999999997e-05, "loss": 3.839, "step": 1 }, { "epoch": 0.011416339636104174, "eval_loss": 4.370181083679199, "eval_runtime": 4.6814, "eval_samples_per_second": 10.681, "eval_steps_per_second": 1.495, "step": 1 }, { "epoch": 0.022832679272208348, "grad_norm": 7.888498306274414, "learning_rate": 5.9999999999999995e-05, "loss": 3.8169, "step": 2 }, { "epoch": 0.03424901890831252, "grad_norm": 8.71157455444336, "learning_rate": 8.999999999999999e-05, "loss": 3.9081, "step": 3 }, { "epoch": 0.045665358544416695, "grad_norm": 7.415948867797852, "learning_rate": 0.00011999999999999999, "loss": 3.4078, "step": 4 }, { "epoch": 0.05708169818052087, "grad_norm": 4.761682033538818, "learning_rate": 0.00015, "loss": 2.8352, "step": 5 }, { "epoch": 0.06849803781662504, "grad_norm": 4.246852397918701, "learning_rate": 0.00017999999999999998, "loss": 2.5466, "step": 6 }, { "epoch": 0.07991437745272922, "grad_norm": 3.3111937046051025, "learning_rate": 0.00020999999999999998, "loss": 2.3276, "step": 7 }, { "epoch": 0.09133071708883339, "grad_norm": 4.848330020904541, "learning_rate": 0.00023999999999999998, "loss": 2.3207, "step": 8 }, { "epoch": 0.10274705672493757, "grad_norm": 2.9185802936553955, "learning_rate": 0.00027, "loss": 2.2468, "step": 9 }, { "epoch": 0.11416339636104174, "grad_norm": 2.7042579650878906, "learning_rate": 0.0003, "loss": 1.9733, "step": 10 }, { "epoch": 0.12557973599714592, "grad_norm": 3.202441692352295, "learning_rate": 0.0002999794957488703, "loss": 2.0887, "step": 11 }, { "epoch": 0.1369960756332501, "grad_norm": 2.5777502059936523, "learning_rate": 0.0002999179886011389, "loss": 1.6886, "step": 12 }, { "epoch": 0.14841241526935425, "grad_norm": 3.485649347305298, "learning_rate": 0.0002998154953722457, "loss": 1.9007, "step": 13 }, { "epoch": 0.15982875490545845, "grad_norm": 2.80680775642395, "learning_rate": 0.00029967204408281613, "loss": 1.9419, "step": 14 }, { "epoch": 0.17124509454156261, "grad_norm": 2.477963924407959, "learning_rate": 0.00029948767395100045, "loss": 1.8113, "step": 15 }, { "epoch": 0.18266143417766678, "grad_norm": 3.5928072929382324, "learning_rate": 0.0002992624353817517, "loss": 2.0529, "step": 16 }, { "epoch": 0.19407777381377095, "grad_norm": 2.8333468437194824, "learning_rate": 0.0002989963899530457, "loss": 1.9951, "step": 17 }, { "epoch": 0.20549411344987514, "grad_norm": 2.8093178272247314, "learning_rate": 0.00029868961039904624, "loss": 1.9505, "step": 18 }, { "epoch": 0.2169104530859793, "grad_norm": 3.1787095069885254, "learning_rate": 0.00029834218059022024, "loss": 1.9297, "step": 19 }, { "epoch": 0.22832679272208348, "grad_norm": 2.9454715251922607, "learning_rate": 0.00029795419551040833, "loss": 1.9485, "step": 20 }, { "epoch": 0.23974313235818764, "grad_norm": 3.2813782691955566, "learning_rate": 0.00029752576123085736, "loss": 2.0743, "step": 21 }, { "epoch": 0.25115947199429184, "grad_norm": 2.510538101196289, "learning_rate": 0.0002970569948812214, "loss": 1.9988, "step": 22 }, { "epoch": 0.262575811630396, "grad_norm": 2.534315586090088, "learning_rate": 0.0002965480246175399, "loss": 1.9098, "step": 23 }, { "epoch": 0.2739921512665002, "grad_norm": 2.1965324878692627, "learning_rate": 0.0002959989895872009, "loss": 1.7517, "step": 24 }, { "epoch": 0.28540849090260434, "grad_norm": 2.1002790927886963, "learning_rate": 0.0002954100398908995, "loss": 1.6897, "step": 25 }, { "epoch": 0.28540849090260434, "eval_loss": 1.9441114664077759, "eval_runtime": 4.7777, "eval_samples_per_second": 10.465, "eval_steps_per_second": 1.465, "step": 25 }, { "epoch": 0.2968248305387085, "grad_norm": 2.207418918609619, "learning_rate": 0.0002947813365416023, "loss": 1.95, "step": 26 }, { "epoch": 0.3082411701748127, "grad_norm": 2.6818461418151855, "learning_rate": 0.0002941130514205272, "loss": 1.9706, "step": 27 }, { "epoch": 0.3196575098109169, "grad_norm": 2.279365062713623, "learning_rate": 0.0002934053672301536, "loss": 1.6524, "step": 28 }, { "epoch": 0.33107384944702106, "grad_norm": 2.2755017280578613, "learning_rate": 0.00029265847744427303, "loss": 1.7105, "step": 29 }, { "epoch": 0.34249018908312523, "grad_norm": 2.5415656566619873, "learning_rate": 0.00029187258625509513, "loss": 1.8029, "step": 30 }, { "epoch": 0.3539065287192294, "grad_norm": 2.6460940837860107, "learning_rate": 0.00029104790851742417, "loss": 1.8104, "step": 31 }, { "epoch": 0.36532286835533356, "grad_norm": 2.3558075428009033, "learning_rate": 0.0002901846696899191, "loss": 1.8417, "step": 32 }, { "epoch": 0.37673920799143773, "grad_norm": 2.1366970539093018, "learning_rate": 0.00028928310577345606, "loss": 1.7281, "step": 33 }, { "epoch": 0.3881555476275419, "grad_norm": 2.147944450378418, "learning_rate": 0.0002883434632466077, "loss": 1.7001, "step": 34 }, { "epoch": 0.3995718872636461, "grad_norm": 1.9789509773254395, "learning_rate": 0.00028736599899825856, "loss": 1.7427, "step": 35 }, { "epoch": 0.4109882268997503, "grad_norm": 2.110389471054077, "learning_rate": 0.00028635098025737434, "loss": 1.8732, "step": 36 }, { "epoch": 0.42240456653585445, "grad_norm": 2.150489568710327, "learning_rate": 0.00028529868451994384, "loss": 1.6447, "step": 37 }, { "epoch": 0.4338209061719586, "grad_norm": 2.31097674369812, "learning_rate": 0.0002842093994731145, "loss": 1.8932, "step": 38 }, { "epoch": 0.4452372458080628, "grad_norm": 2.4540700912475586, "learning_rate": 0.00028308342291654174, "loss": 1.8748, "step": 39 }, { "epoch": 0.45665358544416695, "grad_norm": 2.4295310974121094, "learning_rate": 0.00028192106268097334, "loss": 1.8415, "step": 40 }, { "epoch": 0.4680699250802711, "grad_norm": 3.067713737487793, "learning_rate": 0.00028072263654409154, "loss": 1.9419, "step": 41 }, { "epoch": 0.4794862647163753, "grad_norm": 2.9190239906311035, "learning_rate": 0.0002794884721436361, "loss": 1.9494, "step": 42 }, { "epoch": 0.4909026043524795, "grad_norm": 2.983987808227539, "learning_rate": 0.00027821890688783083, "loss": 1.8778, "step": 43 }, { "epoch": 0.5023189439885837, "grad_norm": 3.0087473392486572, "learning_rate": 0.0002769142878631403, "loss": 1.8793, "step": 44 }, { "epoch": 0.5137352836246878, "grad_norm": 1.934885859489441, "learning_rate": 0.00027557497173937923, "loss": 1.6278, "step": 45 }, { "epoch": 0.525151623260792, "grad_norm": 1.9108154773712158, "learning_rate": 0.000274201324672203, "loss": 1.8105, "step": 46 }, { "epoch": 0.5365679628968962, "grad_norm": 2.1089630126953125, "learning_rate": 0.00027279372220300385, "loss": 1.675, "step": 47 }, { "epoch": 0.5479843025330003, "grad_norm": 2.15185546875, "learning_rate": 0.0002713525491562421, "loss": 1.8751, "step": 48 }, { "epoch": 0.5594006421691046, "grad_norm": 2.311039686203003, "learning_rate": 0.00026987819953423867, "loss": 1.8339, "step": 49 }, { "epoch": 0.5708169818052087, "grad_norm": 2.1658663749694824, "learning_rate": 0.00026837107640945905, "loss": 1.6501, "step": 50 }, { "epoch": 0.5708169818052087, "eval_loss": 1.8114935159683228, "eval_runtime": 4.7797, "eval_samples_per_second": 10.461, "eval_steps_per_second": 1.465, "step": 50 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 8.14825259139072e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }