{ "best_metric": 1.1196767091751099, "best_model_checkpoint": "/data1/attanasiog/safetune/checkpoint-800", "epoch": 0.9997547216090262, "global_step": 1019, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 2e-05, "loss": 2.876, "step": 25 }, { "epoch": 0.05, "learning_rate": 2e-05, "loss": 2.2203, "step": 50 }, { "epoch": 0.05, "eval_loss": 1.828871488571167, "eval_mse": 1.8288714481969155, "eval_runtime": 227.9796, "eval_samples_per_second": 15.896, "eval_steps_per_second": 3.974, "step": 50 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 1.785, "step": 75 }, { "epoch": 0.1, "learning_rate": 2e-05, "loss": 1.8997, "step": 100 }, { "epoch": 0.1, "eval_loss": 1.751622200012207, "eval_mse": 1.751622242974068, "eval_runtime": 228.0631, "eval_samples_per_second": 15.89, "eval_steps_per_second": 3.973, "step": 100 }, { "epoch": 0.12, "learning_rate": 2e-05, "loss": 1.3864, "step": 125 }, { "epoch": 0.15, "learning_rate": 2e-05, "loss": 1.4082, "step": 150 }, { "epoch": 0.15, "eval_loss": 1.395007848739624, "eval_mse": 1.395007782705568, "eval_runtime": 227.9405, "eval_samples_per_second": 15.899, "eval_steps_per_second": 3.975, "step": 150 }, { "epoch": 0.17, "learning_rate": 2e-05, "loss": 1.5694, "step": 175 }, { "epoch": 0.2, "learning_rate": 2e-05, "loss": 1.5899, "step": 200 }, { "epoch": 0.2, "eval_loss": 1.9590266942977905, "eval_mse": 1.9590266676369008, "eval_runtime": 227.3633, "eval_samples_per_second": 15.939, "eval_steps_per_second": 3.985, "step": 200 }, { "epoch": 0.22, "learning_rate": 2e-05, "loss": 1.4847, "step": 225 }, { "epoch": 0.25, "learning_rate": 2e-05, "loss": 1.3633, "step": 250 }, { "epoch": 0.25, "eval_loss": 1.3315744400024414, "eval_mse": 1.3315745865311475, "eval_runtime": 227.6165, "eval_samples_per_second": 15.922, "eval_steps_per_second": 3.98, "step": 250 }, { "epoch": 0.27, "learning_rate": 2e-05, "loss": 1.3815, "step": 275 }, { "epoch": 0.29, "learning_rate": 2e-05, "loss": 1.3758, "step": 300 }, { "epoch": 0.29, "eval_loss": 1.2860150337219238, "eval_mse": 1.2860150669160453, "eval_runtime": 227.7122, "eval_samples_per_second": 15.915, "eval_steps_per_second": 3.979, "step": 300 }, { "epoch": 0.32, "learning_rate": 2e-05, "loss": 1.224, "step": 325 }, { "epoch": 0.34, "learning_rate": 2e-05, "loss": 1.3339, "step": 350 }, { "epoch": 0.34, "eval_loss": 1.269413709640503, "eval_mse": 1.2694137826567282, "eval_runtime": 227.6275, "eval_samples_per_second": 15.921, "eval_steps_per_second": 3.98, "step": 350 }, { "epoch": 0.37, "learning_rate": 2e-05, "loss": 1.3231, "step": 375 }, { "epoch": 0.39, "learning_rate": 2e-05, "loss": 1.2831, "step": 400 }, { "epoch": 0.39, "eval_loss": 1.3048381805419922, "eval_mse": 1.304838097239528, "eval_runtime": 227.5334, "eval_samples_per_second": 15.927, "eval_steps_per_second": 3.982, "step": 400 }, { "epoch": 0.42, "learning_rate": 2e-05, "loss": 1.2382, "step": 425 }, { "epoch": 0.44, "learning_rate": 2e-05, "loss": 1.2928, "step": 450 }, { "epoch": 0.44, "eval_loss": 1.239485263824463, "eval_mse": 1.2394852607350224, "eval_runtime": 227.856, "eval_samples_per_second": 15.905, "eval_steps_per_second": 3.976, "step": 450 }, { "epoch": 0.47, "learning_rate": 2e-05, "loss": 1.1165, "step": 475 }, { "epoch": 0.49, "learning_rate": 2e-05, "loss": 1.2506, "step": 500 }, { "epoch": 0.49, "eval_loss": 1.4315423965454102, "eval_mse": 1.431542415078766, "eval_runtime": 227.6843, "eval_samples_per_second": 15.917, "eval_steps_per_second": 3.979, "step": 500 }, { "epoch": 0.52, "learning_rate": 2e-05, "loss": 1.3249, "step": 525 }, { "epoch": 0.54, "learning_rate": 2e-05, "loss": 1.204, "step": 550 }, { "epoch": 0.54, "eval_loss": 1.1596423387527466, "eval_mse": 1.1596423544220553, "eval_runtime": 228.0433, "eval_samples_per_second": 15.892, "eval_steps_per_second": 3.973, "step": 550 }, { "epoch": 0.56, "learning_rate": 2e-05, "loss": 1.1547, "step": 575 }, { "epoch": 0.59, "learning_rate": 2e-05, "loss": 1.1749, "step": 600 }, { "epoch": 0.59, "eval_loss": 1.199515461921692, "eval_mse": 1.1995155261127237, "eval_runtime": 227.6846, "eval_samples_per_second": 15.917, "eval_steps_per_second": 3.979, "step": 600 }, { "epoch": 0.61, "learning_rate": 2e-05, "loss": 1.1577, "step": 625 }, { "epoch": 0.64, "learning_rate": 2e-05, "loss": 1.134, "step": 650 }, { "epoch": 0.64, "eval_loss": 1.3781769275665283, "eval_mse": 1.378176859542341, "eval_runtime": 227.5773, "eval_samples_per_second": 15.924, "eval_steps_per_second": 3.981, "step": 650 }, { "epoch": 0.66, "learning_rate": 2e-05, "loss": 1.1334, "step": 675 }, { "epoch": 0.69, "learning_rate": 2e-05, "loss": 1.3097, "step": 700 }, { "epoch": 0.69, "eval_loss": 1.1866751909255981, "eval_mse": 1.1866752670053082, "eval_runtime": 227.5406, "eval_samples_per_second": 15.927, "eval_steps_per_second": 3.982, "step": 700 }, { "epoch": 0.71, "learning_rate": 2e-05, "loss": 1.2474, "step": 725 }, { "epoch": 0.74, "learning_rate": 2e-05, "loss": 1.29, "step": 750 }, { "epoch": 0.74, "eval_loss": 1.2024071216583252, "eval_mse": 1.2024071690881728, "eval_runtime": 227.8015, "eval_samples_per_second": 15.909, "eval_steps_per_second": 3.977, "step": 750 }, { "epoch": 0.76, "learning_rate": 2e-05, "loss": 1.2337, "step": 775 }, { "epoch": 0.78, "learning_rate": 2e-05, "loss": 1.1575, "step": 800 }, { "epoch": 0.78, "eval_loss": 1.1196767091751099, "eval_mse": 1.1196766412049022, "eval_runtime": 227.7925, "eval_samples_per_second": 15.909, "eval_steps_per_second": 3.977, "step": 800 }, { "epoch": 0.81, "learning_rate": 2e-05, "loss": 1.2553, "step": 825 }, { "epoch": 0.83, "learning_rate": 2e-05, "loss": 1.2148, "step": 850 }, { "epoch": 0.83, "eval_loss": 1.1944221258163452, "eval_mse": 1.1944222031625475, "eval_runtime": 226.9861, "eval_samples_per_second": 15.966, "eval_steps_per_second": 3.991, "step": 850 }, { "epoch": 0.86, "learning_rate": 2e-05, "loss": 1.211, "step": 875 }, { "epoch": 0.88, "learning_rate": 2e-05, "loss": 1.1597, "step": 900 }, { "epoch": 0.88, "eval_loss": 1.2022933959960938, "eval_mse": 1.202293462029616, "eval_runtime": 227.036, "eval_samples_per_second": 15.962, "eval_steps_per_second": 3.991, "step": 900 }, { "epoch": 0.91, "learning_rate": 2e-05, "loss": 1.193, "step": 925 }, { "epoch": 0.93, "learning_rate": 2e-05, "loss": 1.1422, "step": 950 }, { "epoch": 0.93, "eval_loss": 1.1545809507369995, "eval_mse": 1.1545808871136407, "eval_runtime": 226.9798, "eval_samples_per_second": 15.966, "eval_steps_per_second": 3.992, "step": 950 }, { "epoch": 0.96, "learning_rate": 2e-05, "loss": 1.1564, "step": 975 }, { "epoch": 0.98, "learning_rate": 2e-05, "loss": 1.0734, "step": 1000 }, { "epoch": 0.98, "eval_loss": 1.2593152523040771, "eval_mse": 1.2593151942043517, "eval_runtime": 226.9727, "eval_samples_per_second": 15.967, "eval_steps_per_second": 3.992, "step": 1000 }, { "epoch": 1.0, "step": 1019, "total_flos": 3.038852037112627e+16, "train_loss": 1.3573072581342673, "train_runtime": 8104.8878, "train_samples_per_second": 4.024, "train_steps_per_second": 0.126 } ], "max_steps": 1019, "num_train_epochs": 1, "total_flos": 3.038852037112627e+16, "trial_name": null, "trial_params": null }