{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.6298815822625347, "eval_steps": 500, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 9.5e-05, "loss": 37.4397, "step": 500 }, { "epoch": 0.03, "eval_loss": 31.211898803710938, "eval_runtime": 6.944, "eval_samples_per_second": 14.401, "eval_steps_per_second": 3.6, "step": 500 }, { "epoch": 0.06, "learning_rate": 9e-05, "loss": 33.7011, "step": 1000 }, { "epoch": 0.06, "eval_loss": 30.628690719604492, "eval_runtime": 8.8362, "eval_samples_per_second": 11.317, "eval_steps_per_second": 2.829, "step": 1000 }, { "epoch": 0.09, "learning_rate": 8.5e-05, "loss": 34.3949, "step": 1500 }, { "epoch": 0.09, "eval_loss": 30.2351016998291, "eval_runtime": 9.7911, "eval_samples_per_second": 10.213, "eval_steps_per_second": 2.553, "step": 1500 }, { "epoch": 0.13, "learning_rate": 8e-05, "loss": 33.7534, "step": 2000 }, { "epoch": 0.13, "eval_loss": 30.151206970214844, "eval_runtime": 9.0893, "eval_samples_per_second": 11.002, "eval_steps_per_second": 2.75, "step": 2000 }, { "epoch": 0.16, "learning_rate": 7.500000000000001e-05, "loss": 33.9418, "step": 2500 }, { "epoch": 0.16, "eval_loss": 30.117929458618164, "eval_runtime": 7.9747, "eval_samples_per_second": 12.54, "eval_steps_per_second": 3.135, "step": 2500 }, { "epoch": 0.19, "learning_rate": 7e-05, "loss": 33.3573, "step": 3000 }, { "epoch": 0.19, "eval_loss": 29.591096878051758, "eval_runtime": 7.5459, "eval_samples_per_second": 13.252, "eval_steps_per_second": 3.313, "step": 3000 }, { "epoch": 0.22, "learning_rate": 6.500000000000001e-05, "loss": 33.3372, "step": 3500 }, { "epoch": 0.22, "eval_loss": 29.37584686279297, "eval_runtime": 9.1309, "eval_samples_per_second": 10.952, "eval_steps_per_second": 2.738, "step": 3500 }, { "epoch": 0.25, "learning_rate": 6e-05, "loss": 32.5095, "step": 4000 }, { "epoch": 0.25, "eval_loss": 29.21722412109375, "eval_runtime": 8.935, "eval_samples_per_second": 11.192, "eval_steps_per_second": 2.798, "step": 4000 }, { "epoch": 0.28, "learning_rate": 5.500000000000001e-05, "loss": 32.6192, "step": 4500 }, { "epoch": 0.28, "eval_loss": 28.85555648803711, "eval_runtime": 8.0464, "eval_samples_per_second": 12.428, "eval_steps_per_second": 3.107, "step": 4500 }, { "epoch": 0.31, "learning_rate": 5e-05, "loss": 32.902, "step": 5000 }, { "epoch": 0.31, "eval_loss": 28.798521041870117, "eval_runtime": 8.3773, "eval_samples_per_second": 11.937, "eval_steps_per_second": 2.984, "step": 5000 }, { "epoch": 0.35, "learning_rate": 4.5e-05, "loss": 33.2756, "step": 5500 }, { "epoch": 0.35, "eval_loss": 28.811630249023438, "eval_runtime": 8.4098, "eval_samples_per_second": 11.891, "eval_steps_per_second": 2.973, "step": 5500 }, { "epoch": 0.38, "learning_rate": 4e-05, "loss": 32.3762, "step": 6000 }, { "epoch": 0.38, "eval_loss": 28.51260757446289, "eval_runtime": 8.6441, "eval_samples_per_second": 11.569, "eval_steps_per_second": 2.892, "step": 6000 }, { "epoch": 0.41, "learning_rate": 3.5e-05, "loss": 32.5452, "step": 6500 }, { "epoch": 0.41, "eval_loss": 28.600557327270508, "eval_runtime": 7.2739, "eval_samples_per_second": 13.748, "eval_steps_per_second": 3.437, "step": 6500 }, { "epoch": 0.44, "learning_rate": 3e-05, "loss": 33.0182, "step": 7000 }, { "epoch": 0.44, "eval_loss": 28.495351791381836, "eval_runtime": 6.2937, "eval_samples_per_second": 15.889, "eval_steps_per_second": 3.972, "step": 7000 }, { "epoch": 0.47, "learning_rate": 2.5e-05, "loss": 31.8452, "step": 7500 }, { "epoch": 0.47, "eval_loss": 28.361299514770508, "eval_runtime": 8.0596, "eval_samples_per_second": 12.407, "eval_steps_per_second": 3.102, "step": 7500 }, { "epoch": 0.5, "learning_rate": 2e-05, "loss": 32.0009, "step": 8000 }, { "epoch": 0.5, "eval_loss": 28.153642654418945, "eval_runtime": 6.3145, "eval_samples_per_second": 15.837, "eval_steps_per_second": 3.959, "step": 8000 }, { "epoch": 0.54, "learning_rate": 1.5e-05, "loss": 33.4938, "step": 8500 }, { "epoch": 0.54, "eval_loss": 28.087358474731445, "eval_runtime": 6.3508, "eval_samples_per_second": 15.746, "eval_steps_per_second": 3.937, "step": 8500 }, { "epoch": 0.57, "learning_rate": 1e-05, "loss": 31.9753, "step": 9000 }, { "epoch": 0.57, "eval_loss": 28.113698959350586, "eval_runtime": 9.8066, "eval_samples_per_second": 10.197, "eval_steps_per_second": 2.549, "step": 9000 }, { "epoch": 0.6, "learning_rate": 5e-06, "loss": 31.9571, "step": 9500 }, { "epoch": 0.6, "eval_loss": 28.042316436767578, "eval_runtime": 9.4344, "eval_samples_per_second": 10.6, "eval_steps_per_second": 2.65, "step": 9500 }, { "epoch": 0.63, "learning_rate": 0.0, "loss": 31.8931, "step": 10000 }, { "epoch": 0.63, "eval_loss": 28.017412185668945, "eval_runtime": 9.3747, "eval_samples_per_second": 10.667, "eval_steps_per_second": 2.667, "step": 10000 } ], "logging_steps": 500, "max_steps": 10000, "num_train_epochs": 1, "save_steps": 5000, "total_flos": 1.369514704896e+17, "trial_name": null, "trial_params": null }