{ "best_metric": 1.17391836643219, "best_model_checkpoint": "./outputs/checkpoint-2200", "epoch": 2.9333333333333336, "eval_steps": 100, "global_step": 2200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13, "learning_rate": 0.0002, "loss": 1.2656, "step": 100 }, { "epoch": 0.13, "eval_loss": 1.350274920463562, "eval_runtime": 546.2738, "eval_samples_per_second": 3.533, "eval_steps_per_second": 0.443, "step": 100 }, { "epoch": 0.27, "learning_rate": 0.0002, "loss": 1.103, "step": 200 }, { "epoch": 0.27, "eval_loss": 1.3182251453399658, "eval_runtime": 459.6303, "eval_samples_per_second": 4.199, "eval_steps_per_second": 0.527, "step": 200 }, { "epoch": 0.4, "learning_rate": 0.0002, "loss": 1.0834, "step": 300 }, { "epoch": 0.4, "eval_loss": 1.299042820930481, "eval_runtime": 457.0511, "eval_samples_per_second": 4.223, "eval_steps_per_second": 0.529, "step": 300 }, { "epoch": 0.53, "learning_rate": 0.0002, "loss": 1.0678, "step": 400 }, { "epoch": 0.53, "eval_loss": 1.287185788154602, "eval_runtime": 463.8545, "eval_samples_per_second": 4.161, "eval_steps_per_second": 0.522, "step": 400 }, { "epoch": 0.67, "learning_rate": 0.0002, "loss": 1.0544, "step": 500 }, { "epoch": 0.67, "eval_loss": 1.273476243019104, "eval_runtime": 461.7034, "eval_samples_per_second": 4.18, "eval_steps_per_second": 0.524, "step": 500 }, { "epoch": 0.8, "learning_rate": 0.0002, "loss": 1.0495, "step": 600 }, { "epoch": 0.8, "eval_loss": 1.2602167129516602, "eval_runtime": 455.1997, "eval_samples_per_second": 4.24, "eval_steps_per_second": 0.532, "step": 600 }, { "epoch": 0.93, "learning_rate": 0.0002, "loss": 1.0406, "step": 700 }, { "epoch": 0.93, "eval_loss": 1.2561663389205933, "eval_runtime": 449.9152, "eval_samples_per_second": 4.29, "eval_steps_per_second": 0.538, "step": 700 }, { "epoch": 1.07, "learning_rate": 0.0002, "loss": 1.0282, "step": 800 }, { "epoch": 1.07, "eval_loss": 1.2437876462936401, "eval_runtime": 451.2737, "eval_samples_per_second": 4.277, "eval_steps_per_second": 0.536, "step": 800 }, { "epoch": 1.2, "learning_rate": 0.0002, "loss": 1.0168, "step": 900 }, { "epoch": 1.2, "eval_loss": 1.2371761798858643, "eval_runtime": 452.8697, "eval_samples_per_second": 4.262, "eval_steps_per_second": 0.534, "step": 900 }, { "epoch": 1.33, "learning_rate": 0.0002, "loss": 1.0056, "step": 1000 }, { "epoch": 1.33, "eval_loss": 1.2295143604278564, "eval_runtime": 453.1971, "eval_samples_per_second": 4.259, "eval_steps_per_second": 0.534, "step": 1000 }, { "epoch": 1.47, "learning_rate": 0.0002, "loss": 1.0137, "step": 1100 }, { "epoch": 1.47, "eval_loss": 1.224471092224121, "eval_runtime": 452.058, "eval_samples_per_second": 4.269, "eval_steps_per_second": 0.535, "step": 1100 }, { "epoch": 1.6, "learning_rate": 0.0002, "loss": 1.001, "step": 1200 }, { "epoch": 1.6, "eval_loss": 1.2201058864593506, "eval_runtime": 451.8545, "eval_samples_per_second": 4.271, "eval_steps_per_second": 0.536, "step": 1200 }, { "epoch": 1.73, "learning_rate": 0.0002, "loss": 0.991, "step": 1300 }, { "epoch": 1.73, "eval_loss": 1.2112103700637817, "eval_runtime": 449.46, "eval_samples_per_second": 4.294, "eval_steps_per_second": 0.538, "step": 1300 }, { "epoch": 1.87, "learning_rate": 0.0002, "loss": 0.9919, "step": 1400 }, { "epoch": 1.87, "eval_loss": 1.2065283060073853, "eval_runtime": 451.912, "eval_samples_per_second": 4.271, "eval_steps_per_second": 0.536, "step": 1400 }, { "epoch": 2.0, "learning_rate": 0.0002, "loss": 0.9797, "step": 1500 }, { "epoch": 2.0, "eval_loss": 1.2011693716049194, "eval_runtime": 452.3637, "eval_samples_per_second": 4.266, "eval_steps_per_second": 0.535, "step": 1500 }, { "epoch": 2.13, "learning_rate": 0.0002, "loss": 0.9734, "step": 1600 }, { "epoch": 2.13, "eval_loss": 1.1984320878982544, "eval_runtime": 455.5044, "eval_samples_per_second": 4.237, "eval_steps_per_second": 0.531, "step": 1600 }, { "epoch": 2.27, "learning_rate": 0.0002, "loss": 0.9694, "step": 1700 }, { "epoch": 2.27, "eval_loss": 1.1960954666137695, "eval_runtime": 454.7892, "eval_samples_per_second": 4.244, "eval_steps_per_second": 0.532, "step": 1700 }, { "epoch": 2.4, "learning_rate": 0.0002, "loss": 0.9693, "step": 1800 }, { "epoch": 2.4, "eval_loss": 1.1902049779891968, "eval_runtime": 453.32, "eval_samples_per_second": 4.257, "eval_steps_per_second": 0.534, "step": 1800 }, { "epoch": 2.53, "learning_rate": 0.0002, "loss": 0.9606, "step": 1900 }, { "epoch": 2.53, "eval_loss": 1.183451771736145, "eval_runtime": 452.8791, "eval_samples_per_second": 4.262, "eval_steps_per_second": 0.534, "step": 1900 }, { "epoch": 2.67, "learning_rate": 0.0002, "loss": 0.9541, "step": 2000 }, { "epoch": 2.67, "eval_loss": 1.1814736127853394, "eval_runtime": 456.1407, "eval_samples_per_second": 4.231, "eval_steps_per_second": 0.531, "step": 2000 }, { "epoch": 2.8, "learning_rate": 0.0002, "loss": 0.9638, "step": 2100 }, { "epoch": 2.8, "eval_loss": 1.1796127557754517, "eval_runtime": 454.0476, "eval_samples_per_second": 4.251, "eval_steps_per_second": 0.533, "step": 2100 }, { "epoch": 2.93, "learning_rate": 0.0002, "loss": 0.9646, "step": 2200 }, { "epoch": 2.93, "eval_loss": 1.17391836643219, "eval_runtime": 453.8066, "eval_samples_per_second": 4.253, "eval_steps_per_second": 0.533, "step": 2200 } ], "logging_steps": 100, "max_steps": 2250, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "total_flos": 7.321308131141222e+16, "trial_name": null, "trial_params": null }