|
{ |
|
"best_metric": 1.0674021244049072, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.013531341970839958, |
|
"eval_steps": 50, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00027062683941679914, |
|
"grad_norm": 0.5000702738761902, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 0.8662, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00027062683941679914, |
|
"eval_loss": 1.2486605644226074, |
|
"eval_runtime": 576.5633, |
|
"eval_samples_per_second": 10.795, |
|
"eval_steps_per_second": 2.699, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0005412536788335983, |
|
"grad_norm": 0.7091050744056702, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.8844, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0008118805182503975, |
|
"grad_norm": 0.7198283076286316, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.8736, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0010825073576671966, |
|
"grad_norm": 0.7402541041374207, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.9226, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0013531341970839958, |
|
"grad_norm": 0.6890178918838501, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.9618, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.001623761036500795, |
|
"grad_norm": 0.7613320350646973, |
|
"learning_rate": 3e-06, |
|
"loss": 0.9509, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.001894387875917594, |
|
"grad_norm": 0.8056195378303528, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.9464, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.002165014715334393, |
|
"grad_norm": 0.8153071403503418, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.9766, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0024356415547511926, |
|
"grad_norm": 0.8731986284255981, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.9854, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0027062683941679916, |
|
"grad_norm": 0.8255438804626465, |
|
"learning_rate": 5e-06, |
|
"loss": 1.0802, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0029768952335847906, |
|
"grad_norm": 0.8778969645500183, |
|
"learning_rate": 4.99847706754774e-06, |
|
"loss": 0.9281, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.00324752207300159, |
|
"grad_norm": 0.7399764657020569, |
|
"learning_rate": 4.993910125649561e-06, |
|
"loss": 1.0072, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.003518148912418389, |
|
"grad_norm": 0.9628943800926208, |
|
"learning_rate": 4.986304738420684e-06, |
|
"loss": 0.9862, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.003788775751835188, |
|
"grad_norm": 0.8942387104034424, |
|
"learning_rate": 4.975670171853926e-06, |
|
"loss": 1.0352, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.004059402591251988, |
|
"grad_norm": 0.9490991234779358, |
|
"learning_rate": 4.962019382530521e-06, |
|
"loss": 1.0584, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.004330029430668786, |
|
"grad_norm": 0.9122024178504944, |
|
"learning_rate": 4.9453690018345144e-06, |
|
"loss": 1.0072, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.004600656270085586, |
|
"grad_norm": 0.7996208667755127, |
|
"learning_rate": 4.925739315689991e-06, |
|
"loss": 1.1048, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.004871283109502385, |
|
"grad_norm": 0.9378483891487122, |
|
"learning_rate": 4.903154239845798e-06, |
|
"loss": 1.0365, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.005141909948919184, |
|
"grad_norm": 0.8539978861808777, |
|
"learning_rate": 4.8776412907378845e-06, |
|
"loss": 0.9979, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.005412536788335983, |
|
"grad_norm": 0.9930011630058289, |
|
"learning_rate": 4.849231551964771e-06, |
|
"loss": 1.0151, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.005683163627752783, |
|
"grad_norm": 0.9113985300064087, |
|
"learning_rate": 4.817959636416969e-06, |
|
"loss": 1.0266, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.005953790467169581, |
|
"grad_norm": 0.9310271739959717, |
|
"learning_rate": 4.783863644106502e-06, |
|
"loss": 0.998, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.006224417306586381, |
|
"grad_norm": 1.0234174728393555, |
|
"learning_rate": 4.746985115747918e-06, |
|
"loss": 1.0837, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.00649504414600318, |
|
"grad_norm": 0.890159010887146, |
|
"learning_rate": 4.707368982147318e-06, |
|
"loss": 1.0044, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.006765670985419979, |
|
"grad_norm": 1.1107178926467896, |
|
"learning_rate": 4.665063509461098e-06, |
|
"loss": 1.1168, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.007036297824836778, |
|
"grad_norm": 1.1049308776855469, |
|
"learning_rate": 4.620120240391065e-06, |
|
"loss": 1.1942, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.007306924664253578, |
|
"grad_norm": 1.1453604698181152, |
|
"learning_rate": 4.572593931387604e-06, |
|
"loss": 1.0531, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.007577551503670376, |
|
"grad_norm": 1.0684839487075806, |
|
"learning_rate": 4.522542485937369e-06, |
|
"loss": 1.0973, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.007848178343087176, |
|
"grad_norm": 1.1629831790924072, |
|
"learning_rate": 4.470026884016805e-06, |
|
"loss": 1.0673, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.008118805182503975, |
|
"grad_norm": 1.0914651155471802, |
|
"learning_rate": 4.415111107797445e-06, |
|
"loss": 1.1575, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.008389432021920775, |
|
"grad_norm": 1.3187044858932495, |
|
"learning_rate": 4.357862063693486e-06, |
|
"loss": 1.223, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.008660058861337572, |
|
"grad_norm": 1.2369117736816406, |
|
"learning_rate": 4.2983495008466285e-06, |
|
"loss": 1.1662, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.008930685700754372, |
|
"grad_norm": 1.3803397417068481, |
|
"learning_rate": 4.236645926147493e-06, |
|
"loss": 1.1548, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.009201312540171171, |
|
"grad_norm": 1.3240615129470825, |
|
"learning_rate": 4.172826515897146e-06, |
|
"loss": 1.1982, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.00947193937958797, |
|
"grad_norm": 1.5324469804763794, |
|
"learning_rate": 4.106969024216348e-06, |
|
"loss": 1.1681, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.00974256621900477, |
|
"grad_norm": 1.323723554611206, |
|
"learning_rate": 4.039153688314146e-06, |
|
"loss": 1.1543, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.01001319305842157, |
|
"grad_norm": 1.5759074687957764, |
|
"learning_rate": 3.969463130731183e-06, |
|
"loss": 1.1227, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.010283819897838367, |
|
"grad_norm": 1.6370131969451904, |
|
"learning_rate": 3.897982258676867e-06, |
|
"loss": 1.1549, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.010554446737255167, |
|
"grad_norm": 1.5213303565979004, |
|
"learning_rate": 3.824798160583012e-06, |
|
"loss": 1.1308, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.010825073576671966, |
|
"grad_norm": 1.753510594367981, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 1.2172, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.011095700416088766, |
|
"grad_norm": 1.6697436571121216, |
|
"learning_rate": 3.6736789069647273e-06, |
|
"loss": 1.1446, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.011366327255505565, |
|
"grad_norm": 1.6956145763397217, |
|
"learning_rate": 3.595927866972694e-06, |
|
"loss": 1.0812, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.011636954094922363, |
|
"grad_norm": 1.6840723752975464, |
|
"learning_rate": 3.516841607689501e-06, |
|
"loss": 1.1333, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.011907580934339163, |
|
"grad_norm": 1.8532711267471313, |
|
"learning_rate": 3.436516483539781e-06, |
|
"loss": 1.2552, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.012178207773755962, |
|
"grad_norm": 2.4561421871185303, |
|
"learning_rate": 3.3550503583141726e-06, |
|
"loss": 1.3493, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.012448834613172761, |
|
"grad_norm": 2.4903533458709717, |
|
"learning_rate": 3.272542485937369e-06, |
|
"loss": 1.2094, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.012719461452589561, |
|
"grad_norm": 3.978550910949707, |
|
"learning_rate": 3.189093389542498e-06, |
|
"loss": 1.508, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.01299008829200636, |
|
"grad_norm": 6.7383856773376465, |
|
"learning_rate": 3.1048047389991693e-06, |
|
"loss": 1.5481, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.013260715131423158, |
|
"grad_norm": 7.621368885040283, |
|
"learning_rate": 3.019779227044398e-06, |
|
"loss": 1.7839, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.013531341970839958, |
|
"grad_norm": 7.821316242218018, |
|
"learning_rate": 2.9341204441673267e-06, |
|
"loss": 1.8119, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.013531341970839958, |
|
"eval_loss": 1.0674021244049072, |
|
"eval_runtime": 581.3896, |
|
"eval_samples_per_second": 10.705, |
|
"eval_steps_per_second": 2.676, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.688115974465126e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|