|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.0434593654932638, |
|
"eval_steps": 9, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.000434593654932638, |
|
"grad_norm": 1.3132737874984741, |
|
"learning_rate": 1e-05, |
|
"loss": 10.4006, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.000434593654932638, |
|
"eval_loss": 10.412358283996582, |
|
"eval_runtime": 4.3982, |
|
"eval_samples_per_second": 1762.306, |
|
"eval_steps_per_second": 55.25, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.000869187309865276, |
|
"grad_norm": 1.264045238494873, |
|
"learning_rate": 2e-05, |
|
"loss": 10.4156, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.001303780964797914, |
|
"grad_norm": 1.3569703102111816, |
|
"learning_rate": 3e-05, |
|
"loss": 10.4107, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.001738374619730552, |
|
"grad_norm": 1.262238621711731, |
|
"learning_rate": 4e-05, |
|
"loss": 10.4171, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0021729682746631897, |
|
"grad_norm": 1.3592532873153687, |
|
"learning_rate": 5e-05, |
|
"loss": 10.4103, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.002607561929595828, |
|
"grad_norm": 1.3463159799575806, |
|
"learning_rate": 6e-05, |
|
"loss": 10.4094, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.003042155584528466, |
|
"grad_norm": 1.3761146068572998, |
|
"learning_rate": 7e-05, |
|
"loss": 10.4068, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.003476749239461104, |
|
"grad_norm": 1.395202398300171, |
|
"learning_rate": 8e-05, |
|
"loss": 10.4009, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.003911342894393742, |
|
"grad_norm": 1.3503137826919556, |
|
"learning_rate": 9e-05, |
|
"loss": 10.4024, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.003911342894393742, |
|
"eval_loss": 10.399940490722656, |
|
"eval_runtime": 4.3771, |
|
"eval_samples_per_second": 1770.819, |
|
"eval_steps_per_second": 55.517, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.004345936549326379, |
|
"grad_norm": 1.2571992874145508, |
|
"learning_rate": 0.0001, |
|
"loss": 10.4072, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0047805302042590175, |
|
"grad_norm": 1.3371496200561523, |
|
"learning_rate": 9.99695413509548e-05, |
|
"loss": 10.4061, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.005215123859191656, |
|
"grad_norm": 1.3684200048446655, |
|
"learning_rate": 9.987820251299122e-05, |
|
"loss": 10.4007, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.005649717514124294, |
|
"grad_norm": 1.4165489673614502, |
|
"learning_rate": 9.972609476841367e-05, |
|
"loss": 10.3896, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.006084311169056932, |
|
"grad_norm": 1.4051456451416016, |
|
"learning_rate": 9.951340343707852e-05, |
|
"loss": 10.387, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.00651890482398957, |
|
"grad_norm": 1.4135016202926636, |
|
"learning_rate": 9.924038765061042e-05, |
|
"loss": 10.3802, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.006953498478922208, |
|
"grad_norm": 1.4162020683288574, |
|
"learning_rate": 9.890738003669029e-05, |
|
"loss": 10.3781, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.007388092133854845, |
|
"grad_norm": 1.4314630031585693, |
|
"learning_rate": 9.851478631379982e-05, |
|
"loss": 10.3888, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.007822685788787484, |
|
"grad_norm": 1.5304012298583984, |
|
"learning_rate": 9.806308479691595e-05, |
|
"loss": 10.3621, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.007822685788787484, |
|
"eval_loss": 10.36774730682373, |
|
"eval_runtime": 4.3755, |
|
"eval_samples_per_second": 1771.438, |
|
"eval_steps_per_second": 55.536, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.008257279443720122, |
|
"grad_norm": 1.4514573812484741, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 10.3745, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.008691873098652759, |
|
"grad_norm": 1.5256571769714355, |
|
"learning_rate": 9.698463103929542e-05, |
|
"loss": 10.3615, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.009126466753585397, |
|
"grad_norm": 1.528025507926941, |
|
"learning_rate": 9.635919272833938e-05, |
|
"loss": 10.3546, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.009561060408518035, |
|
"grad_norm": 1.59959876537323, |
|
"learning_rate": 9.567727288213005e-05, |
|
"loss": 10.3554, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.009995654063450673, |
|
"grad_norm": 1.6324058771133423, |
|
"learning_rate": 9.493970231495835e-05, |
|
"loss": 10.3522, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.010430247718383311, |
|
"grad_norm": 1.6419366598129272, |
|
"learning_rate": 9.414737964294636e-05, |
|
"loss": 10.3531, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.01086484137331595, |
|
"grad_norm": 1.518904447555542, |
|
"learning_rate": 9.330127018922194e-05, |
|
"loss": 10.3524, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.011299435028248588, |
|
"grad_norm": 1.6594668626785278, |
|
"learning_rate": 9.24024048078213e-05, |
|
"loss": 10.3408, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.011734028683181226, |
|
"grad_norm": 1.6574015617370605, |
|
"learning_rate": 9.145187862775209e-05, |
|
"loss": 10.3433, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.011734028683181226, |
|
"eval_loss": 10.332037925720215, |
|
"eval_runtime": 4.4025, |
|
"eval_samples_per_second": 1760.591, |
|
"eval_steps_per_second": 55.196, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.012168622338113864, |
|
"grad_norm": 1.6417022943496704, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 10.3352, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.012603215993046502, |
|
"grad_norm": 1.6392464637756348, |
|
"learning_rate": 8.940053768033609e-05, |
|
"loss": 10.3281, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.01303780964797914, |
|
"grad_norm": 1.6714566946029663, |
|
"learning_rate": 8.83022221559489e-05, |
|
"loss": 10.3287, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.013472403302911778, |
|
"grad_norm": 1.6938108205795288, |
|
"learning_rate": 8.715724127386972e-05, |
|
"loss": 10.3242, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.013906996957844416, |
|
"grad_norm": 1.8171216249465942, |
|
"learning_rate": 8.596699001693255e-05, |
|
"loss": 10.311, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.014341590612777053, |
|
"grad_norm": 1.7566795349121094, |
|
"learning_rate": 8.473291852294987e-05, |
|
"loss": 10.3095, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.01477618426770969, |
|
"grad_norm": 1.7736107110977173, |
|
"learning_rate": 8.345653031794292e-05, |
|
"loss": 10.2989, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.015210777922642329, |
|
"grad_norm": 1.776149034500122, |
|
"learning_rate": 8.213938048432697e-05, |
|
"loss": 10.3029, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.01564537157757497, |
|
"grad_norm": 1.7460124492645264, |
|
"learning_rate": 8.07830737662829e-05, |
|
"loss": 10.3025, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.01564537157757497, |
|
"eval_loss": 10.295435905456543, |
|
"eval_runtime": 4.3752, |
|
"eval_samples_per_second": 1771.593, |
|
"eval_steps_per_second": 55.541, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.016079965232507605, |
|
"grad_norm": 1.8232313394546509, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 10.2951, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.016514558887440245, |
|
"grad_norm": 1.7527620792388916, |
|
"learning_rate": 7.795964517353735e-05, |
|
"loss": 10.2857, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.01694915254237288, |
|
"grad_norm": 1.7404676675796509, |
|
"learning_rate": 7.649596321166024e-05, |
|
"loss": 10.2875, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.017383746197305518, |
|
"grad_norm": 1.8285305500030518, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 10.2828, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.017818339852238158, |
|
"grad_norm": 1.7420995235443115, |
|
"learning_rate": 7.347357813929454e-05, |
|
"loss": 10.2849, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.018252933507170794, |
|
"grad_norm": 1.7493330240249634, |
|
"learning_rate": 7.191855733945387e-05, |
|
"loss": 10.2871, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.018687527162103434, |
|
"grad_norm": 1.7411251068115234, |
|
"learning_rate": 7.033683215379002e-05, |
|
"loss": 10.2775, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.01912212081703607, |
|
"grad_norm": 1.7749557495117188, |
|
"learning_rate": 6.873032967079561e-05, |
|
"loss": 10.2578, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.01955671447196871, |
|
"grad_norm": 1.7583379745483398, |
|
"learning_rate": 6.710100716628344e-05, |
|
"loss": 10.2678, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.01955671447196871, |
|
"eval_loss": 10.261744499206543, |
|
"eval_runtime": 4.3669, |
|
"eval_samples_per_second": 1774.935, |
|
"eval_steps_per_second": 55.646, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.019991308126901346, |
|
"grad_norm": 1.7365025281906128, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 10.2571, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.020425901781833986, |
|
"grad_norm": 1.7217011451721191, |
|
"learning_rate": 6.378186779084995e-05, |
|
"loss": 10.2583, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.020860495436766623, |
|
"grad_norm": 1.7320549488067627, |
|
"learning_rate": 6.209609477998338e-05, |
|
"loss": 10.2667, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.021295089091699262, |
|
"grad_norm": 1.7598958015441895, |
|
"learning_rate": 6.0395584540887963e-05, |
|
"loss": 10.2522, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0217296827466319, |
|
"grad_norm": 1.7420951128005981, |
|
"learning_rate": 5.868240888334653e-05, |
|
"loss": 10.2627, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02216427640156454, |
|
"grad_norm": 1.6855006217956543, |
|
"learning_rate": 5.695865504800327e-05, |
|
"loss": 10.2587, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.022598870056497175, |
|
"grad_norm": 1.6962989568710327, |
|
"learning_rate": 5.522642316338268e-05, |
|
"loss": 10.2377, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.02303346371142981, |
|
"grad_norm": 1.6561306715011597, |
|
"learning_rate": 5.348782368720626e-05, |
|
"loss": 10.2395, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.02346805736636245, |
|
"grad_norm": 1.6776403188705444, |
|
"learning_rate": 5.174497483512506e-05, |
|
"loss": 10.2517, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.02346805736636245, |
|
"eval_loss": 10.234784126281738, |
|
"eval_runtime": 4.3928, |
|
"eval_samples_per_second": 1764.474, |
|
"eval_steps_per_second": 55.318, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.023902651021295088, |
|
"grad_norm": 1.6507542133331299, |
|
"learning_rate": 5e-05, |
|
"loss": 10.2472, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.024337244676227728, |
|
"grad_norm": 1.6575618982315063, |
|
"learning_rate": 4.825502516487497e-05, |
|
"loss": 10.2308, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.024771838331160364, |
|
"grad_norm": 1.6367050409317017, |
|
"learning_rate": 4.6512176312793736e-05, |
|
"loss": 10.2184, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.025206431986093004, |
|
"grad_norm": 1.634709358215332, |
|
"learning_rate": 4.477357683661734e-05, |
|
"loss": 10.2335, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.02564102564102564, |
|
"grad_norm": 1.6403002738952637, |
|
"learning_rate": 4.3041344951996746e-05, |
|
"loss": 10.2438, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.02607561929595828, |
|
"grad_norm": 1.5752891302108765, |
|
"learning_rate": 4.131759111665349e-05, |
|
"loss": 10.2285, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.026510212950890916, |
|
"grad_norm": 1.5749379396438599, |
|
"learning_rate": 3.960441545911204e-05, |
|
"loss": 10.2177, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.026944806605823556, |
|
"grad_norm": 1.5850154161453247, |
|
"learning_rate": 3.790390522001662e-05, |
|
"loss": 10.2273, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.027379400260756193, |
|
"grad_norm": 1.5233105421066284, |
|
"learning_rate": 3.6218132209150045e-05, |
|
"loss": 10.2221, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.027379400260756193, |
|
"eval_loss": 10.216018676757812, |
|
"eval_runtime": 4.396, |
|
"eval_samples_per_second": 1763.203, |
|
"eval_steps_per_second": 55.278, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.027813993915688832, |
|
"grad_norm": 1.5639811754226685, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 10.2026, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.02824858757062147, |
|
"grad_norm": 1.541033148765564, |
|
"learning_rate": 3.289899283371657e-05, |
|
"loss": 10.215, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.028683181225554105, |
|
"grad_norm": 1.4939786195755005, |
|
"learning_rate": 3.12696703292044e-05, |
|
"loss": 10.1985, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.029117774880486745, |
|
"grad_norm": 1.4225854873657227, |
|
"learning_rate": 2.9663167846209998e-05, |
|
"loss": 10.2112, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.02955236853541938, |
|
"grad_norm": 1.5087547302246094, |
|
"learning_rate": 2.8081442660546125e-05, |
|
"loss": 10.214, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.02998696219035202, |
|
"grad_norm": 1.4858537912368774, |
|
"learning_rate": 2.6526421860705473e-05, |
|
"loss": 10.2253, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.030421555845284658, |
|
"grad_norm": 1.4683682918548584, |
|
"learning_rate": 2.500000000000001e-05, |
|
"loss": 10.2205, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.030856149500217298, |
|
"grad_norm": 1.4585028886795044, |
|
"learning_rate": 2.350403678833976e-05, |
|
"loss": 10.2128, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.03129074315514994, |
|
"grad_norm": 1.476791262626648, |
|
"learning_rate": 2.2040354826462668e-05, |
|
"loss": 10.2148, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.03129074315514994, |
|
"eval_loss": 10.20433521270752, |
|
"eval_runtime": 4.3833, |
|
"eval_samples_per_second": 1768.31, |
|
"eval_steps_per_second": 55.438, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.03172533681008257, |
|
"grad_norm": 1.4202309846878052, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 10.2038, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.03215993046501521, |
|
"grad_norm": 1.4229024648666382, |
|
"learning_rate": 1.9216926233717085e-05, |
|
"loss": 10.1987, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.03259452411994785, |
|
"grad_norm": 1.4023692607879639, |
|
"learning_rate": 1.7860619515673033e-05, |
|
"loss": 10.21, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.03302911777488049, |
|
"grad_norm": 1.3915892839431763, |
|
"learning_rate": 1.6543469682057106e-05, |
|
"loss": 10.2071, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.03346371142981312, |
|
"grad_norm": 1.4067327976226807, |
|
"learning_rate": 1.526708147705013e-05, |
|
"loss": 10.1902, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.03389830508474576, |
|
"grad_norm": 1.4121757745742798, |
|
"learning_rate": 1.4033009983067452e-05, |
|
"loss": 10.192, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.0343328987396784, |
|
"grad_norm": 1.445264220237732, |
|
"learning_rate": 1.2842758726130283e-05, |
|
"loss": 10.2108, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.034767492394611035, |
|
"grad_norm": 1.4060300588607788, |
|
"learning_rate": 1.1697777844051105e-05, |
|
"loss": 10.1989, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.035202086049543675, |
|
"grad_norm": 1.4106123447418213, |
|
"learning_rate": 1.0599462319663905e-05, |
|
"loss": 10.2025, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.035202086049543675, |
|
"eval_loss": 10.19814395904541, |
|
"eval_runtime": 4.4149, |
|
"eval_samples_per_second": 1755.627, |
|
"eval_steps_per_second": 55.04, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.035636679704476315, |
|
"grad_norm": 1.4134420156478882, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 10.2071, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.036071273359408955, |
|
"grad_norm": 1.3421871662139893, |
|
"learning_rate": 8.548121372247918e-06, |
|
"loss": 10.199, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.03650586701434159, |
|
"grad_norm": 1.3774632215499878, |
|
"learning_rate": 7.597595192178702e-06, |
|
"loss": 10.19, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.03694046066927423, |
|
"grad_norm": 1.3491445779800415, |
|
"learning_rate": 6.698729810778065e-06, |
|
"loss": 10.1879, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.03737505432420687, |
|
"grad_norm": 1.3587225675582886, |
|
"learning_rate": 5.852620357053651e-06, |
|
"loss": 10.1965, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.03780964797913951, |
|
"grad_norm": 1.3620103597640991, |
|
"learning_rate": 5.060297685041659e-06, |
|
"loss": 10.1911, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.03824424163407214, |
|
"grad_norm": 1.337594985961914, |
|
"learning_rate": 4.322727117869951e-06, |
|
"loss": 10.1873, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.03867883528900478, |
|
"grad_norm": 1.3280407190322876, |
|
"learning_rate": 3.6408072716606346e-06, |
|
"loss": 10.2072, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.03911342894393742, |
|
"grad_norm": 1.3855839967727661, |
|
"learning_rate": 3.0153689607045845e-06, |
|
"loss": 10.2045, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03911342894393742, |
|
"eval_loss": 10.195647239685059, |
|
"eval_runtime": 4.3777, |
|
"eval_samples_per_second": 1770.565, |
|
"eval_steps_per_second": 55.509, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03954802259887006, |
|
"grad_norm": 1.3364759683609009, |
|
"learning_rate": 2.4471741852423237e-06, |
|
"loss": 10.1887, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.03998261625380269, |
|
"grad_norm": 1.2921222448349, |
|
"learning_rate": 1.9369152030840556e-06, |
|
"loss": 10.1892, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.04041720990873533, |
|
"grad_norm": 1.3389850854873657, |
|
"learning_rate": 1.4852136862001764e-06, |
|
"loss": 10.2107, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.04085180356366797, |
|
"grad_norm": 1.3627759218215942, |
|
"learning_rate": 1.0926199633097157e-06, |
|
"loss": 10.2064, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.041286397218600605, |
|
"grad_norm": 1.3275415897369385, |
|
"learning_rate": 7.596123493895991e-07, |
|
"loss": 10.1928, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.041720990873533245, |
|
"grad_norm": 1.3738682270050049, |
|
"learning_rate": 4.865965629214819e-07, |
|
"loss": 10.2054, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.042155584528465885, |
|
"grad_norm": 1.3698922395706177, |
|
"learning_rate": 2.7390523158633554e-07, |
|
"loss": 10.2082, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.042590178183398525, |
|
"grad_norm": 1.3257871866226196, |
|
"learning_rate": 1.2179748700879012e-07, |
|
"loss": 10.1871, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.04302477183833116, |
|
"grad_norm": 1.3610293865203857, |
|
"learning_rate": 3.04586490452119e-08, |
|
"loss": 10.192, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.04302477183833116, |
|
"eval_loss": 10.195563316345215, |
|
"eval_runtime": 4.3766, |
|
"eval_samples_per_second": 1771.0, |
|
"eval_steps_per_second": 55.522, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.0434593654932638, |
|
"grad_norm": 1.345515251159668, |
|
"learning_rate": 0.0, |
|
"loss": 10.1928, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 20920978636800.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|