|
{ |
|
"best_metric": 1.1208428144454956, |
|
"best_model_checkpoint": "./outputs/checkpoint-3600", |
|
"epoch": 2.6229508196721314, |
|
"eval_steps": 100, |
|
"global_step": 3600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002, |
|
"loss": 1.765, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 1.631608486175537, |
|
"eval_runtime": 430.1354, |
|
"eval_samples_per_second": 14.586, |
|
"eval_steps_per_second": 1.825, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6077, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 1.5870035886764526, |
|
"eval_runtime": 417.8578, |
|
"eval_samples_per_second": 15.015, |
|
"eval_steps_per_second": 1.879, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5755, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.5563029050827026, |
|
"eval_runtime": 417.8787, |
|
"eval_samples_per_second": 15.014, |
|
"eval_steps_per_second": 1.879, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5445, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 1.5300686359405518, |
|
"eval_runtime": 417.9846, |
|
"eval_samples_per_second": 15.01, |
|
"eval_steps_per_second": 1.878, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5114, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.5074832439422607, |
|
"eval_runtime": 417.962, |
|
"eval_samples_per_second": 15.011, |
|
"eval_steps_per_second": 1.878, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4946, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.4870833158493042, |
|
"eval_runtime": 417.6568, |
|
"eval_samples_per_second": 15.022, |
|
"eval_steps_per_second": 1.88, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0002, |
|
"loss": 1.472, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.4683642387390137, |
|
"eval_runtime": 417.7114, |
|
"eval_samples_per_second": 15.02, |
|
"eval_steps_per_second": 1.879, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4622, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.4501450061798096, |
|
"eval_runtime": 417.7046, |
|
"eval_samples_per_second": 15.02, |
|
"eval_steps_per_second": 1.879, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002, |
|
"loss": 1.433, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.4323909282684326, |
|
"eval_runtime": 417.8696, |
|
"eval_samples_per_second": 15.014, |
|
"eval_steps_per_second": 1.879, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4268, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 1.4162870645523071, |
|
"eval_runtime": 417.7667, |
|
"eval_samples_per_second": 15.018, |
|
"eval_steps_per_second": 1.879, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4125, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.4018020629882812, |
|
"eval_runtime": 417.5926, |
|
"eval_samples_per_second": 15.024, |
|
"eval_steps_per_second": 1.88, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3846, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 1.3857518434524536, |
|
"eval_runtime": 417.6267, |
|
"eval_samples_per_second": 15.023, |
|
"eval_steps_per_second": 1.88, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3933, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 1.3718016147613525, |
|
"eval_runtime": 417.2876, |
|
"eval_samples_per_second": 15.035, |
|
"eval_steps_per_second": 1.881, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3556, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 1.3577570915222168, |
|
"eval_runtime": 417.5078, |
|
"eval_samples_per_second": 15.027, |
|
"eval_steps_per_second": 1.88, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3316, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 1.3446884155273438, |
|
"eval_runtime": 417.3899, |
|
"eval_samples_per_second": 15.032, |
|
"eval_steps_per_second": 1.881, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3207, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 1.3314917087554932, |
|
"eval_runtime": 417.7717, |
|
"eval_samples_per_second": 15.018, |
|
"eval_steps_per_second": 1.879, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3212, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 1.318053960800171, |
|
"eval_runtime": 417.56, |
|
"eval_samples_per_second": 15.025, |
|
"eval_steps_per_second": 1.88, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2967, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 1.3063520193099976, |
|
"eval_runtime": 417.5913, |
|
"eval_samples_per_second": 15.024, |
|
"eval_steps_per_second": 1.88, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2883, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 1.2934125661849976, |
|
"eval_runtime": 417.7511, |
|
"eval_samples_per_second": 15.019, |
|
"eval_steps_per_second": 1.879, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0002, |
|
"loss": 1.276, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_loss": 1.2826957702636719, |
|
"eval_runtime": 448.0598, |
|
"eval_samples_per_second": 14.003, |
|
"eval_steps_per_second": 1.752, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2659, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_loss": 1.2710933685302734, |
|
"eval_runtime": 417.5822, |
|
"eval_samples_per_second": 15.025, |
|
"eval_steps_per_second": 1.88, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2451, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 1.259511113166809, |
|
"eval_runtime": 417.6409, |
|
"eval_samples_per_second": 15.022, |
|
"eval_steps_per_second": 1.88, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2436, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 1.2497906684875488, |
|
"eval_runtime": 417.7302, |
|
"eval_samples_per_second": 15.019, |
|
"eval_steps_per_second": 1.879, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2412, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 1.2375760078430176, |
|
"eval_runtime": 417.7184, |
|
"eval_samples_per_second": 15.02, |
|
"eval_steps_per_second": 1.879, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2223, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 1.2266868352890015, |
|
"eval_runtime": 417.9231, |
|
"eval_samples_per_second": 15.012, |
|
"eval_steps_per_second": 1.878, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2106, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 1.216168999671936, |
|
"eval_runtime": 417.7189, |
|
"eval_samples_per_second": 15.02, |
|
"eval_steps_per_second": 1.879, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2034, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_loss": 1.2053289413452148, |
|
"eval_runtime": 417.5961, |
|
"eval_samples_per_second": 15.024, |
|
"eval_steps_per_second": 1.88, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1787, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_loss": 1.1963659524917603, |
|
"eval_runtime": 417.7494, |
|
"eval_samples_per_second": 15.019, |
|
"eval_steps_per_second": 1.879, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1541, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_loss": 1.1863473653793335, |
|
"eval_runtime": 417.6096, |
|
"eval_samples_per_second": 15.024, |
|
"eval_steps_per_second": 1.88, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1567, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_loss": 1.17583167552948, |
|
"eval_runtime": 417.6171, |
|
"eval_samples_per_second": 15.023, |
|
"eval_steps_per_second": 1.88, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1395, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_loss": 1.1677733659744263, |
|
"eval_runtime": 417.4863, |
|
"eval_samples_per_second": 15.028, |
|
"eval_steps_per_second": 1.88, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1424, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_loss": 1.1579593420028687, |
|
"eval_runtime": 417.5279, |
|
"eval_samples_per_second": 15.027, |
|
"eval_steps_per_second": 1.88, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1263, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 1.147441029548645, |
|
"eval_runtime": 424.7891, |
|
"eval_samples_per_second": 14.77, |
|
"eval_steps_per_second": 1.848, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1114, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_loss": 1.1393115520477295, |
|
"eval_runtime": 423.9015, |
|
"eval_samples_per_second": 14.801, |
|
"eval_steps_per_second": 1.852, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1116, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_loss": 1.1295558214187622, |
|
"eval_runtime": 417.5664, |
|
"eval_samples_per_second": 15.025, |
|
"eval_steps_per_second": 1.88, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1054, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_loss": 1.1208428144454956, |
|
"eval_runtime": 437.4502, |
|
"eval_samples_per_second": 14.342, |
|
"eval_steps_per_second": 1.794, |
|
"step": 3600 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 4116, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 1.1670242076660173e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|