{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.0023720290336353717, "eval_steps": 9, "global_step": 25, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 9.488116134541487e-05, "grad_norm": 1.1179174184799194, "learning_rate": 1e-05, "loss": 2.0136, "step": 1 }, { "epoch": 9.488116134541487e-05, "eval_loss": 2.044814109802246, "eval_runtime": 1082.9405, "eval_samples_per_second": 8.196, "eval_steps_per_second": 1.025, "step": 1 }, { "epoch": 0.00018976232269082974, "grad_norm": 1.2873406410217285, "learning_rate": 2e-05, "loss": 2.2054, "step": 2 }, { "epoch": 0.00028464348403624463, "grad_norm": 1.4217164516448975, "learning_rate": 3e-05, "loss": 2.2993, "step": 3 }, { "epoch": 0.00037952464538165947, "grad_norm": 1.0913364887237549, "learning_rate": 4e-05, "loss": 1.9095, "step": 4 }, { "epoch": 0.00047440580672707437, "grad_norm": 1.1571333408355713, "learning_rate": 5e-05, "loss": 1.8157, "step": 5 }, { "epoch": 0.0005692869680724893, "grad_norm": 1.1290559768676758, "learning_rate": 6e-05, "loss": 1.6643, "step": 6 }, { "epoch": 0.000664168129417904, "grad_norm": 1.201819896697998, "learning_rate": 7e-05, "loss": 1.9168, "step": 7 }, { "epoch": 0.0007590492907633189, "grad_norm": 1.501457929611206, "learning_rate": 8e-05, "loss": 2.1298, "step": 8 }, { "epoch": 0.0008539304521087338, "grad_norm": 1.226552963256836, "learning_rate": 9e-05, "loss": 1.9444, "step": 9 }, { "epoch": 0.0008539304521087338, "eval_loss": 1.828656792640686, "eval_runtime": 1085.5057, "eval_samples_per_second": 8.177, "eval_steps_per_second": 1.023, "step": 9 }, { "epoch": 0.0009488116134541487, "grad_norm": 1.08986496925354, "learning_rate": 0.0001, "loss": 2.0429, "step": 10 }, { "epoch": 0.0010436927747995636, "grad_norm": 1.1125391721725464, "learning_rate": 9.99695413509548e-05, "loss": 1.8817, "step": 11 }, { "epoch": 0.0011385739361449785, "grad_norm": 1.5336627960205078, "learning_rate": 9.987820251299122e-05, "loss": 1.8072, "step": 12 }, { "epoch": 0.0012334550974903932, "grad_norm": 1.4077439308166504, "learning_rate": 9.972609476841367e-05, "loss": 1.8512, "step": 13 }, { "epoch": 0.001328336258835808, "grad_norm": 1.3526650667190552, "learning_rate": 9.951340343707852e-05, "loss": 1.7567, "step": 14 }, { "epoch": 0.001423217420181223, "grad_norm": 1.23054039478302, "learning_rate": 9.924038765061042e-05, "loss": 1.782, "step": 15 }, { "epoch": 0.0015180985815266379, "grad_norm": 1.219004511833191, "learning_rate": 9.890738003669029e-05, "loss": 1.7007, "step": 16 }, { "epoch": 0.0016129797428720528, "grad_norm": 0.9380444288253784, "learning_rate": 9.851478631379982e-05, "loss": 1.7286, "step": 17 }, { "epoch": 0.0017078609042174677, "grad_norm": 1.0295952558517456, "learning_rate": 9.806308479691595e-05, "loss": 1.7644, "step": 18 }, { "epoch": 0.0017078609042174677, "eval_loss": 1.6711846590042114, "eval_runtime": 1084.8063, "eval_samples_per_second": 8.182, "eval_steps_per_second": 1.023, "step": 18 }, { "epoch": 0.0018027420655628826, "grad_norm": 1.3452121019363403, "learning_rate": 9.755282581475769e-05, "loss": 1.9288, "step": 19 }, { "epoch": 0.0018976232269082975, "grad_norm": 0.9437580108642578, "learning_rate": 9.698463103929542e-05, "loss": 1.5166, "step": 20 }, { "epoch": 0.001992504388253712, "grad_norm": 0.9429516792297363, "learning_rate": 9.635919272833938e-05, "loss": 1.4955, "step": 21 }, { "epoch": 0.0020873855495991273, "grad_norm": 0.8644145727157593, "learning_rate": 9.567727288213005e-05, "loss": 1.4253, "step": 22 }, { "epoch": 0.002182266710944542, "grad_norm": 0.940532922744751, "learning_rate": 9.493970231495835e-05, "loss": 1.6322, "step": 23 }, { "epoch": 0.002277147872289957, "grad_norm": 0.9520596265792847, "learning_rate": 9.414737964294636e-05, "loss": 1.4608, "step": 24 }, { "epoch": 0.0023720290336353717, "grad_norm": 1.16845703125, "learning_rate": 9.330127018922194e-05, "loss": 1.5855, "step": 25 } ], "logging_steps": 1, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.85471921553408e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }