{ "best_metric": 0.9062915444374084, "best_model_checkpoint": "ckpt/origin_vehicle_view/pedes_rewrite/checkpoint-108", "epoch": 14.328358208955224, "eval_steps": 6, "global_step": 120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12, "learning_rate": 5e-05, "loss": 1.8347, "step": 1 }, { "epoch": 0.24, "learning_rate": 0.0001, "loss": 1.8144, "step": 2 }, { "epoch": 0.36, "learning_rate": 9.998228055617263e-05, "loss": 1.8775, "step": 3 }, { "epoch": 0.48, "learning_rate": 9.99291347838381e-05, "loss": 1.8513, "step": 4 }, { "epoch": 0.6, "learning_rate": 9.98406003515375e-05, "loss": 1.8054, "step": 5 }, { "epoch": 0.72, "learning_rate": 9.971674001050686e-05, "loss": 1.7317, "step": 6 }, { "epoch": 0.72, "eval_loss": 1.6815745830535889, "eval_runtime": 126.1045, "eval_samples_per_second": 1.467, "eval_steps_per_second": 1.467, "step": 6 }, { "epoch": 0.84, "learning_rate": 9.955764155020037e-05, "loss": 1.6507, "step": 7 }, { "epoch": 0.96, "learning_rate": 9.936341773606723e-05, "loss": 1.609, "step": 8 }, { "epoch": 1.07, "learning_rate": 9.913420622962606e-05, "loss": 1.6463, "step": 9 }, { "epoch": 1.19, "learning_rate": 9.887016949089333e-05, "loss": 1.5635, "step": 10 }, { "epoch": 1.31, "learning_rate": 9.857149466323549e-05, "loss": 1.56, "step": 11 }, { "epoch": 1.43, "learning_rate": 9.82383934407258e-05, "loss": 1.5471, "step": 12 }, { "epoch": 1.43, "eval_loss": 1.5364471673965454, "eval_runtime": 125.7151, "eval_samples_per_second": 1.472, "eval_steps_per_second": 1.472, "step": 12 }, { "epoch": 1.55, "learning_rate": 9.787110191810027e-05, "loss": 1.4941, "step": 13 }, { "epoch": 1.67, "learning_rate": 9.746988042341906e-05, "loss": 1.4615, "step": 14 }, { "epoch": 1.79, "learning_rate": 9.703501333355168e-05, "loss": 1.4819, "step": 15 }, { "epoch": 1.91, "learning_rate": 9.656680887261693e-05, "loss": 1.4794, "step": 16 }, { "epoch": 2.03, "learning_rate": 9.606559889352064e-05, "loss": 1.4049, "step": 17 }, { "epoch": 2.15, "learning_rate": 9.553173864274567e-05, "loss": 1.3581, "step": 18 }, { "epoch": 2.15, "eval_loss": 1.3939788341522217, "eval_runtime": 125.5936, "eval_samples_per_second": 1.473, "eval_steps_per_second": 1.473, "step": 18 }, { "epoch": 2.27, "learning_rate": 9.496560650856097e-05, "loss": 1.3588, "step": 19 }, { "epoch": 2.39, "learning_rate": 9.436760375282859e-05, "loss": 1.3622, "step": 20 }, { "epoch": 2.51, "learning_rate": 9.373815422659806e-05, "loss": 1.3424, "step": 21 }, { "epoch": 2.63, "learning_rate": 9.30777040696903e-05, "loss": 1.2635, "step": 22 }, { "epoch": 2.75, "learning_rate": 9.238672139448354e-05, "loss": 1.2576, "step": 23 }, { "epoch": 2.87, "learning_rate": 9.166569595412575e-05, "loss": 1.2665, "step": 24 }, { "epoch": 2.87, "eval_loss": 1.2668133974075317, "eval_runtime": 125.4051, "eval_samples_per_second": 1.475, "eval_steps_per_second": 1.475, "step": 24 }, { "epoch": 2.99, "learning_rate": 9.091513879540845e-05, "loss": 1.2343, "step": 25 }, { "epoch": 3.1, "learning_rate": 9.013558189654819e-05, "loss": 1.1665, "step": 26 }, { "epoch": 3.22, "learning_rate": 8.932757779013214e-05, "loss": 1.2102, "step": 27 }, { "epoch": 3.34, "learning_rate": 8.849169917149531e-05, "loss": 1.2002, "step": 28 }, { "epoch": 3.46, "learning_rate": 8.762853849280693e-05, "loss": 1.1412, "step": 29 }, { "epoch": 3.58, "learning_rate": 8.673870754315336e-05, "loss": 1.1299, "step": 30 }, { "epoch": 3.58, "eval_loss": 1.1677451133728027, "eval_runtime": 125.5572, "eval_samples_per_second": 1.473, "eval_steps_per_second": 1.473, "step": 30 }, { "epoch": 3.7, "learning_rate": 8.582283701491576e-05, "loss": 1.128, "step": 31 }, { "epoch": 3.82, "learning_rate": 8.488157605674925e-05, "loss": 1.0769, "step": 32 }, { "epoch": 3.94, "learning_rate": 8.391559181348082e-05, "loss": 1.1296, "step": 33 }, { "epoch": 4.06, "learning_rate": 8.292556895325194e-05, "loss": 1.082, "step": 34 }, { "epoch": 4.18, "learning_rate": 8.191220918224101e-05, "loss": 1.0029, "step": 35 }, { "epoch": 4.3, "learning_rate": 8.08762307473096e-05, "loss": 1.0501, "step": 36 }, { "epoch": 4.3, "eval_loss": 1.0887305736541748, "eval_runtime": 125.6531, "eval_samples_per_second": 1.472, "eval_steps_per_second": 1.472, "step": 36 }, { "epoch": 4.42, "learning_rate": 7.981836792692508e-05, "loss": 1.0311, "step": 37 }, { "epoch": 4.54, "learning_rate": 7.873937051072035e-05, "loss": 1.0238, "step": 38 }, { "epoch": 4.66, "learning_rate": 7.764000326805967e-05, "loss": 0.9792, "step": 39 }, { "epoch": 4.78, "learning_rate": 7.652104540598712e-05, "loss": 1.0071, "step": 40 }, { "epoch": 4.9, "learning_rate": 7.5383290016942e-05, "loss": 1.0934, "step": 41 }, { "epoch": 5.01, "learning_rate": 7.422754351663252e-05, "loss": 0.995, "step": 42 }, { "epoch": 5.01, "eval_loss": 1.0306411981582642, "eval_runtime": 125.5286, "eval_samples_per_second": 1.474, "eval_steps_per_second": 1.474, "step": 42 }, { "epoch": 5.13, "learning_rate": 7.30546250724663e-05, "loss": 0.9693, "step": 43 }, { "epoch": 5.25, "learning_rate": 7.186536602294278e-05, "loss": 0.9486, "step": 44 }, { "epoch": 5.37, "learning_rate": 7.066060928841892e-05, "loss": 0.9569, "step": 45 }, { "epoch": 5.49, "learning_rate": 6.944120877366604e-05, "loss": 0.9455, "step": 46 }, { "epoch": 5.61, "learning_rate": 6.820802876264112e-05, "loss": 0.9153, "step": 47 }, { "epoch": 5.73, "learning_rate": 6.696194330590151e-05, "loss": 0.929, "step": 48 }, { "epoch": 5.73, "eval_loss": 0.9906992316246033, "eval_runtime": 125.6953, "eval_samples_per_second": 1.472, "eval_steps_per_second": 1.472, "step": 48 }, { "epoch": 5.85, "learning_rate": 6.570383560109745e-05, "loss": 0.8984, "step": 49 }, { "epoch": 5.97, "learning_rate": 6.443459736698105e-05, "loss": 0.9365, "step": 50 }, { "epoch": 6.09, "learning_rate": 6.315512821137606e-05, "loss": 0.9218, "step": 51 }, { "epoch": 6.21, "learning_rate": 6.186633499355576e-05, "loss": 0.8942, "step": 52 }, { "epoch": 6.33, "learning_rate": 6.056913118148122e-05, "loss": 0.8714, "step": 53 }, { "epoch": 6.45, "learning_rate": 5.9264436204355724e-05, "loss": 0.8837, "step": 54 }, { "epoch": 6.45, "eval_loss": 0.9632167220115662, "eval_runtime": 125.527, "eval_samples_per_second": 1.474, "eval_steps_per_second": 1.474, "step": 54 }, { "epoch": 6.57, "learning_rate": 5.7953174800953604e-05, "loss": 0.8434, "step": 55 }, { "epoch": 6.69, "learning_rate": 5.6636276364186105e-05, "loss": 0.8463, "step": 56 }, { "epoch": 6.81, "learning_rate": 5.5314674282368275e-05, "loss": 0.9126, "step": 57 }, { "epoch": 6.93, "learning_rate": 5.3989305277654156e-05, "loss": 0.8758, "step": 58 }, { "epoch": 7.04, "learning_rate": 5.2661108742108935e-05, "loss": 0.8322, "step": 59 }, { "epoch": 7.16, "learning_rate": 5.133102607188874e-05, "loss": 0.854, "step": 60 }, { "epoch": 7.16, "eval_loss": 0.9417232871055603, "eval_runtime": 125.6452, "eval_samples_per_second": 1.472, "eval_steps_per_second": 1.472, "step": 60 }, { "epoch": 7.28, "learning_rate": 5e-05, "loss": 0.819, "step": 61 }, { "epoch": 7.4, "learning_rate": 4.866897392811126e-05, "loss": 0.7644, "step": 62 }, { "epoch": 7.52, "learning_rate": 4.7338891257891084e-05, "loss": 0.8631, "step": 63 }, { "epoch": 7.64, "learning_rate": 4.601069472234584e-05, "loss": 0.8201, "step": 64 }, { "epoch": 7.76, "learning_rate": 4.4685325717631736e-05, "loss": 0.8726, "step": 65 }, { "epoch": 7.88, "learning_rate": 4.336372363581391e-05, "loss": 0.8153, "step": 66 }, { "epoch": 7.88, "eval_loss": 0.928375244140625, "eval_runtime": 125.6028, "eval_samples_per_second": 1.473, "eval_steps_per_second": 1.473, "step": 66 }, { "epoch": 8.0, "learning_rate": 4.204682519904641e-05, "loss": 0.8055, "step": 67 }, { "epoch": 8.12, "learning_rate": 4.0735563795644294e-05, "loss": 0.7927, "step": 68 }, { "epoch": 8.24, "learning_rate": 3.9430868818518784e-05, "loss": 0.76, "step": 69 }, { "epoch": 8.36, "learning_rate": 3.8133665006444255e-05, "loss": 0.7788, "step": 70 }, { "epoch": 8.48, "learning_rate": 3.6844871788623945e-05, "loss": 0.7797, "step": 71 }, { "epoch": 8.6, "learning_rate": 3.556540263301896e-05, "loss": 0.8129, "step": 72 }, { "epoch": 8.6, "eval_loss": 0.9196569323539734, "eval_runtime": 125.5832, "eval_samples_per_second": 1.473, "eval_steps_per_second": 1.473, "step": 72 }, { "epoch": 8.72, "learning_rate": 3.429616439890258e-05, "loss": 0.8396, "step": 73 }, { "epoch": 8.84, "learning_rate": 3.303805669409848e-05, "loss": 0.7988, "step": 74 }, { "epoch": 8.96, "learning_rate": 3.179197123735889e-05, "loss": 0.8258, "step": 75 }, { "epoch": 9.07, "learning_rate": 3.055879122633397e-05, "loss": 0.7467, "step": 76 }, { "epoch": 9.19, "learning_rate": 2.9339390711581105e-05, "loss": 0.789, "step": 77 }, { "epoch": 9.31, "learning_rate": 2.8134633977057235e-05, "loss": 0.8003, "step": 78 }, { "epoch": 9.31, "eval_loss": 0.914078414440155, "eval_runtime": 125.6795, "eval_samples_per_second": 1.472, "eval_steps_per_second": 1.472, "step": 78 }, { "epoch": 9.43, "learning_rate": 2.69453749275337e-05, "loss": 0.7977, "step": 79 }, { "epoch": 9.55, "learning_rate": 2.5772456483367497e-05, "loss": 0.7476, "step": 80 }, { "epoch": 9.67, "learning_rate": 2.4616709983058018e-05, "loss": 0.7893, "step": 81 }, { "epoch": 9.79, "learning_rate": 2.347895459401288e-05, "loss": 0.7542, "step": 82 }, { "epoch": 9.91, "learning_rate": 2.235999673194035e-05, "loss": 0.7442, "step": 83 }, { "epoch": 10.03, "learning_rate": 2.126062948927966e-05, "loss": 0.7713, "step": 84 }, { "epoch": 10.03, "eval_loss": 0.9113681316375732, "eval_runtime": 125.6045, "eval_samples_per_second": 1.473, "eval_steps_per_second": 1.473, "step": 84 }, { "epoch": 10.15, "learning_rate": 2.0181632073074926e-05, "loss": 0.736, "step": 85 }, { "epoch": 10.27, "learning_rate": 1.912376925269041e-05, "loss": 0.75, "step": 86 }, { "epoch": 10.39, "learning_rate": 1.808779081775901e-05, "loss": 0.7386, "step": 87 }, { "epoch": 10.51, "learning_rate": 1.7074431046748075e-05, "loss": 0.7225, "step": 88 }, { "epoch": 10.63, "learning_rate": 1.6084408186519196e-05, "loss": 0.7509, "step": 89 }, { "epoch": 10.75, "learning_rate": 1.5118423943250771e-05, "loss": 0.7938, "step": 90 }, { "epoch": 10.75, "eval_loss": 0.9084902405738831, "eval_runtime": 125.6727, "eval_samples_per_second": 1.472, "eval_steps_per_second": 1.472, "step": 90 }, { "epoch": 10.87, "learning_rate": 1.4177162985084242e-05, "loss": 0.7491, "step": 91 }, { "epoch": 10.99, "learning_rate": 1.3261292456846647e-05, "loss": 0.8042, "step": 92 }, { "epoch": 11.1, "learning_rate": 1.2371461507193078e-05, "loss": 0.7312, "step": 93 }, { "epoch": 11.22, "learning_rate": 1.150830082850468e-05, "loss": 0.6995, "step": 94 }, { "epoch": 11.34, "learning_rate": 1.0672422209867878e-05, "loss": 0.7765, "step": 95 }, { "epoch": 11.46, "learning_rate": 9.864418103451828e-06, "loss": 0.718, "step": 96 }, { "epoch": 11.46, "eval_loss": 0.9067288637161255, "eval_runtime": 125.3815, "eval_samples_per_second": 1.475, "eval_steps_per_second": 1.475, "step": 96 }, { "epoch": 11.58, "learning_rate": 9.084861204591549e-06, "loss": 0.7403, "step": 97 }, { "epoch": 11.7, "learning_rate": 8.334304045874247e-06, "loss": 0.8061, "step": 98 }, { "epoch": 11.82, "learning_rate": 7.613278605516455e-06, "loss": 0.7374, "step": 99 }, { "epoch": 11.94, "learning_rate": 6.922295930309691e-06, "loss": 0.7497, "step": 100 }, { "epoch": 12.06, "learning_rate": 6.2618457734019364e-06, "loss": 0.7383, "step": 101 }, { "epoch": 12.18, "learning_rate": 5.6323962471714286e-06, "loss": 0.7519, "step": 102 }, { "epoch": 12.18, "eval_loss": 0.9065980911254883, "eval_runtime": 125.6052, "eval_samples_per_second": 1.473, "eval_steps_per_second": 1.473, "step": 102 }, { "epoch": 12.3, "learning_rate": 5.034393491439043e-06, "loss": 0.7509, "step": 103 }, { "epoch": 12.42, "learning_rate": 4.468261357254339e-06, "loss": 0.763, "step": 104 }, { "epoch": 12.54, "learning_rate": 3.9344011064793516e-06, "loss": 0.7143, "step": 105 }, { "epoch": 12.66, "learning_rate": 3.4331911273830784e-06, "loss": 0.7091, "step": 106 }, { "epoch": 12.78, "learning_rate": 2.9649866664483385e-06, "loss": 0.7425, "step": 107 }, { "epoch": 12.9, "learning_rate": 2.530119576580936e-06, "loss": 0.7282, "step": 108 }, { "epoch": 12.9, "eval_loss": 0.9062915444374084, "eval_runtime": 125.4352, "eval_samples_per_second": 1.475, "eval_steps_per_second": 1.475, "step": 108 }, { "epoch": 13.01, "learning_rate": 2.1288980818997275e-06, "loss": 0.7624, "step": 109 }, { "epoch": 13.13, "learning_rate": 1.7616065592742038e-06, "loss": 0.7489, "step": 110 }, { "epoch": 13.25, "learning_rate": 1.4285053367645074e-06, "loss": 0.7369, "step": 111 }, { "epoch": 13.37, "learning_rate": 1.1298305091066664e-06, "loss": 0.7228, "step": 112 }, { "epoch": 13.49, "learning_rate": 8.657937703739516e-07, "loss": 0.762, "step": 113 }, { "epoch": 13.61, "learning_rate": 6.365822639327723e-07, "loss": 0.7462, "step": 114 }, { "epoch": 13.61, "eval_loss": 0.9065557718276978, "eval_runtime": 125.8194, "eval_samples_per_second": 1.47, "eval_steps_per_second": 1.47, "step": 114 }, { "epoch": 13.73, "learning_rate": 4.423584497996458e-07, "loss": 0.7595, "step": 115 }, { "epoch": 13.85, "learning_rate": 2.8325998949314536e-07, "loss": 0.7116, "step": 116 }, { "epoch": 13.97, "learning_rate": 1.5939964846249378e-07, "loss": 0.7176, "step": 117 }, { "epoch": 14.09, "learning_rate": 7.086521616190279e-08, "loss": 0.753, "step": 118 }, { "epoch": 14.21, "learning_rate": 1.7719443827368677e-08, "loss": 0.7458, "step": 119 }, { "epoch": 14.33, "learning_rate": 0.0, "loss": 0.7689, "step": 120 }, { "epoch": 14.33, "eval_loss": 0.9066407680511475, "eval_runtime": 125.8513, "eval_samples_per_second": 1.47, "eval_steps_per_second": 1.47, "step": 120 }, { "epoch": 14.33, "step": 120, "total_flos": 4.71865270275072e+17, "train_loss": 0.9905460620919864, "train_runtime": 12301.8505, "train_samples_per_second": 0.488, "train_steps_per_second": 0.01 } ], "logging_steps": 1.0, "max_steps": 120, "num_train_epochs": 15, "save_steps": 12, "total_flos": 4.71865270275072e+17, "trial_name": null, "trial_params": null }