{ "best_metric": 0.7586850523948669, "best_model_checkpoint": "experiments/checkpoint-42", "epoch": 78.0, "global_step": 98, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.0, "learning_rate": 4.9999999999999996e-05, "loss": 0.3998, "step": 5 }, { "epoch": 8.0, "learning_rate": 9.999999999999999e-05, "loss": 0.3916, "step": 10 }, { "epoch": 11.0, "eval_loss": 1.4091475009918213, "eval_runtime": 4.4251, "eval_samples_per_second": 2.26, "eval_steps_per_second": 0.452, "step": 14 }, { "epoch": 12.0, "learning_rate": 0.00015, "loss": 0.3745, "step": 15 }, { "epoch": 16.0, "learning_rate": 0.00019999999999999998, "loss": 0.3387, "step": 20 }, { "epoch": 20.0, "learning_rate": 0.00025, "loss": 0.2894, "step": 25 }, { "epoch": 22.4, "eval_loss": 0.8969688415527344, "eval_runtime": 4.4836, "eval_samples_per_second": 2.23, "eval_steps_per_second": 0.446, "step": 28 }, { "epoch": 24.0, "learning_rate": 0.0003, "loss": 0.2322, "step": 30 }, { "epoch": 28.0, "learning_rate": 0.00027857142857142854, "loss": 0.1964, "step": 35 }, { "epoch": 32.0, "learning_rate": 0.0002571428571428571, "loss": 0.1739, "step": 40 }, { "epoch": 34.0, "eval_loss": 0.7586850523948669, "eval_runtime": 4.4178, "eval_samples_per_second": 2.264, "eval_steps_per_second": 0.453, "step": 42 }, { "epoch": 36.0, "learning_rate": 0.00023571428571428569, "loss": 0.157, "step": 45 }, { "epoch": 40.0, "learning_rate": 0.00021428571428571427, "loss": 0.1408, "step": 50 }, { "epoch": 44.0, "learning_rate": 0.00019285714285714286, "loss": 0.1264, "step": 55 }, { "epoch": 44.8, "eval_loss": 0.7922688126564026, "eval_runtime": 4.4244, "eval_samples_per_second": 2.26, "eval_steps_per_second": 0.452, "step": 56 }, { "epoch": 48.0, "learning_rate": 0.0001714285714285714, "loss": 0.1134, "step": 60 }, { "epoch": 52.0, "learning_rate": 0.00015, "loss": 0.1021, "step": 65 }, { "epoch": 56.0, "learning_rate": 0.00012857142857142855, "loss": 0.0893, "step": 70 }, { "epoch": 56.0, "eval_loss": 0.8863828778266907, "eval_runtime": 4.4142, "eval_samples_per_second": 2.265, "eval_steps_per_second": 0.453, "step": 70 }, { "epoch": 60.0, "learning_rate": 0.00010714285714285714, "loss": 0.0778, "step": 75 }, { "epoch": 64.0, "learning_rate": 8.57142857142857e-05, "loss": 0.0677, "step": 80 }, { "epoch": 67.2, "eval_loss": 1.0279890298843384, "eval_runtime": 4.4181, "eval_samples_per_second": 2.263, "eval_steps_per_second": 0.453, "step": 84 }, { "epoch": 68.0, "learning_rate": 6.428571428571427e-05, "loss": 0.0598, "step": 85 }, { "epoch": 72.0, "learning_rate": 4.285714285714285e-05, "loss": 0.0524, "step": 90 }, { "epoch": 76.0, "learning_rate": 2.1428571428571425e-05, "loss": 0.0474, "step": 95 }, { "epoch": 78.0, "eval_loss": 1.1172549724578857, "eval_runtime": 4.4004, "eval_samples_per_second": 2.273, "eval_steps_per_second": 0.455, "step": 98 } ], "max_steps": 100, "num_train_epochs": 100, "total_flos": 6.335270841090048e+16, "trial_name": null, "trial_params": null }