llava-v1.5-7b-Posthoc / trainer_state.json
ys-zong
upload weights
3529d19
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.944,
"global_step": 138,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.9901,
"step": 1
},
{
"epoch": 0.04,
"learning_rate": 4.000000000000001e-06,
"loss": 0.9263,
"step": 2
},
{
"epoch": 0.06,
"learning_rate": 6e-06,
"loss": 0.9251,
"step": 3
},
{
"epoch": 0.09,
"learning_rate": 8.000000000000001e-06,
"loss": 0.9197,
"step": 4
},
{
"epoch": 0.11,
"learning_rate": 1e-05,
"loss": 0.8951,
"step": 5
},
{
"epoch": 0.13,
"learning_rate": 9.998605186060138e-06,
"loss": 0.8878,
"step": 6
},
{
"epoch": 0.15,
"learning_rate": 9.99442152244292e-06,
"loss": 0.8363,
"step": 7
},
{
"epoch": 0.17,
"learning_rate": 9.98745134332128e-06,
"loss": 0.8379,
"step": 8
},
{
"epoch": 0.19,
"learning_rate": 9.97769853753642e-06,
"loss": 0.2429,
"step": 9
},
{
"epoch": 0.21,
"learning_rate": 9.965168546428122e-06,
"loss": 0.8244,
"step": 10
},
{
"epoch": 0.23,
"learning_rate": 9.949868360798893e-06,
"loss": 0.8101,
"step": 11
},
{
"epoch": 0.26,
"learning_rate": 9.931806517013612e-06,
"loss": 0.863,
"step": 12
},
{
"epoch": 0.28,
"learning_rate": 9.910993092236878e-06,
"loss": 0.8458,
"step": 13
},
{
"epoch": 0.3,
"learning_rate": 9.887439698810694e-06,
"loss": 0.806,
"step": 14
},
{
"epoch": 0.32,
"learning_rate": 9.861159477775653e-06,
"loss": 0.8507,
"step": 15
},
{
"epoch": 0.34,
"learning_rate": 9.832167091539215e-06,
"loss": 0.7992,
"step": 16
},
{
"epoch": 0.36,
"learning_rate": 9.800478715695165e-06,
"loss": 0.8719,
"step": 17
},
{
"epoch": 0.38,
"learning_rate": 9.766112029998847e-06,
"loss": 0.2583,
"step": 18
},
{
"epoch": 0.41,
"learning_rate": 9.729086208503174e-06,
"loss": 0.8628,
"step": 19
},
{
"epoch": 0.43,
"learning_rate": 9.689421908860928e-06,
"loss": 0.8408,
"step": 20
},
{
"epoch": 0.45,
"learning_rate": 9.64714126079933e-06,
"loss": 0.8208,
"step": 21
},
{
"epoch": 0.47,
"learning_rate": 9.602267853773301e-06,
"loss": 0.8414,
"step": 22
},
{
"epoch": 0.49,
"learning_rate": 9.554826723804304e-06,
"loss": 0.7868,
"step": 23
},
{
"epoch": 0.51,
"learning_rate": 9.504844339512096e-06,
"loss": 0.8611,
"step": 24
},
{
"epoch": 0.53,
"learning_rate": 9.452348587347224e-06,
"loss": 0.8196,
"step": 25
},
{
"epoch": 0.55,
"learning_rate": 9.397368756032445e-06,
"loss": 0.8339,
"step": 26
},
{
"epoch": 0.58,
"learning_rate": 9.339935520221816e-06,
"loss": 0.7913,
"step": 27
},
{
"epoch": 0.6,
"learning_rate": 9.280080923386501e-06,
"loss": 0.8374,
"step": 28
},
{
"epoch": 0.62,
"learning_rate": 9.217838359936914e-06,
"loss": 0.8664,
"step": 29
},
{
"epoch": 0.64,
"learning_rate": 9.153242556591115e-06,
"loss": 0.8206,
"step": 30
},
{
"epoch": 0.66,
"learning_rate": 9.08632955299989e-06,
"loss": 0.8471,
"step": 31
},
{
"epoch": 0.68,
"learning_rate": 9.017136681639307e-06,
"loss": 0.773,
"step": 32
},
{
"epoch": 0.7,
"learning_rate": 8.94570254698197e-06,
"loss": 0.7712,
"step": 33
},
{
"epoch": 0.73,
"learning_rate": 8.872067003958597e-06,
"loss": 0.7854,
"step": 34
},
{
"epoch": 0.75,
"learning_rate": 8.796271135721944e-06,
"loss": 0.7561,
"step": 35
},
{
"epoch": 0.77,
"learning_rate": 8.71835723072545e-06,
"loss": 0.8138,
"step": 36
},
{
"epoch": 0.79,
"learning_rate": 8.638368759129433e-06,
"loss": 0.7854,
"step": 37
},
{
"epoch": 0.81,
"learning_rate": 8.556350348547978e-06,
"loss": 0.8172,
"step": 38
},
{
"epoch": 0.83,
"learning_rate": 8.472347759150044e-06,
"loss": 0.7749,
"step": 39
},
{
"epoch": 0.85,
"learning_rate": 8.386407858128707e-06,
"loss": 0.8892,
"step": 40
},
{
"epoch": 0.87,
"learning_rate": 8.298578593552737e-06,
"loss": 0.86,
"step": 41
},
{
"epoch": 0.9,
"learning_rate": 8.208908967615159e-06,
"loss": 0.773,
"step": 42
},
{
"epoch": 0.92,
"learning_rate": 8.117449009293668e-06,
"loss": 0.8129,
"step": 43
},
{
"epoch": 0.94,
"learning_rate": 8.024249746438189e-06,
"loss": 0.8166,
"step": 44
},
{
"epoch": 0.96,
"learning_rate": 7.929363177301124e-06,
"loss": 0.8203,
"step": 45
},
{
"epoch": 0.98,
"learning_rate": 7.832842241526212e-06,
"loss": 0.754,
"step": 46
},
{
"epoch": 1.0,
"learning_rate": 7.734740790612137e-06,
"loss": 0.3353,
"step": 47
},
{
"epoch": 1.02,
"learning_rate": 7.635113557867395e-06,
"loss": 0.6187,
"step": 48
},
{
"epoch": 1.05,
"learning_rate": 7.5340161278732e-06,
"loss": 0.5845,
"step": 49
},
{
"epoch": 1.07,
"learning_rate": 7.431504905471407e-06,
"loss": 0.6143,
"step": 50
},
{
"epoch": 1.09,
"learning_rate": 7.327637084294818e-06,
"loss": 0.6156,
"step": 51
},
{
"epoch": 1.11,
"learning_rate": 7.22247061485738e-06,
"loss": 0.5726,
"step": 52
},
{
"epoch": 1.13,
"learning_rate": 7.1160641722221255e-06,
"loss": 0.5536,
"step": 53
},
{
"epoch": 1.15,
"learning_rate": 7.008477123264849e-06,
"loss": 0.5926,
"step": 54
},
{
"epoch": 1.17,
"learning_rate": 6.8997694935518e-06,
"loss": 0.4939,
"step": 55
},
{
"epoch": 1.19,
"learning_rate": 6.7900019338499005e-06,
"loss": 0.1926,
"step": 56
},
{
"epoch": 1.22,
"learning_rate": 6.6792356862881144e-06,
"loss": 0.56,
"step": 57
},
{
"epoch": 1.24,
"learning_rate": 6.567532550188908e-06,
"loss": 0.5608,
"step": 58
},
{
"epoch": 1.26,
"learning_rate": 6.454954847588824e-06,
"loss": 0.5033,
"step": 59
},
{
"epoch": 1.28,
"learning_rate": 6.341565388467425e-06,
"loss": 0.4969,
"step": 60
},
{
"epoch": 1.3,
"learning_rate": 6.227427435703997e-06,
"loss": 0.547,
"step": 61
},
{
"epoch": 1.32,
"learning_rate": 6.112604669781572e-06,
"loss": 0.535,
"step": 62
},
{
"epoch": 1.34,
"learning_rate": 5.997161153257963e-06,
"loss": 0.5331,
"step": 63
},
{
"epoch": 1.37,
"learning_rate": 5.88116129502361e-06,
"loss": 0.5558,
"step": 64
},
{
"epoch": 1.39,
"learning_rate": 5.764669814366231e-06,
"loss": 0.5304,
"step": 65
},
{
"epoch": 1.41,
"learning_rate": 5.647751704862263e-06,
"loss": 0.5342,
"step": 66
},
{
"epoch": 1.43,
"learning_rate": 5.530472198115291e-06,
"loss": 0.5577,
"step": 67
},
{
"epoch": 1.45,
"learning_rate": 5.412896727361663e-06,
"loss": 0.5478,
"step": 68
},
{
"epoch": 1.47,
"learning_rate": 5.2950908909636144e-06,
"loss": 0.4553,
"step": 69
},
{
"epoch": 1.49,
"learning_rate": 5.177120415810271e-06,
"loss": 0.5004,
"step": 70
},
{
"epoch": 1.51,
"learning_rate": 5.059051120646924e-06,
"loss": 0.5255,
"step": 71
},
{
"epoch": 1.54,
"learning_rate": 4.940948879353078e-06,
"loss": 0.5618,
"step": 72
},
{
"epoch": 1.56,
"learning_rate": 4.822879584189732e-06,
"loss": 0.5546,
"step": 73
},
{
"epoch": 1.58,
"learning_rate": 4.704909109036387e-06,
"loss": 0.5701,
"step": 74
},
{
"epoch": 1.6,
"learning_rate": 4.587103272638339e-06,
"loss": 0.5303,
"step": 75
},
{
"epoch": 1.62,
"learning_rate": 4.46952780188471e-06,
"loss": 0.5049,
"step": 76
},
{
"epoch": 1.64,
"learning_rate": 4.352248295137739e-06,
"loss": 0.5607,
"step": 77
},
{
"epoch": 1.66,
"learning_rate": 4.23533018563377e-06,
"loss": 0.5129,
"step": 78
},
{
"epoch": 1.69,
"learning_rate": 4.118838704976392e-06,
"loss": 0.5127,
"step": 79
},
{
"epoch": 1.71,
"learning_rate": 4.002838846742039e-06,
"loss": 0.5433,
"step": 80
},
{
"epoch": 1.73,
"learning_rate": 3.887395330218429e-06,
"loss": 0.5042,
"step": 81
},
{
"epoch": 1.75,
"learning_rate": 3.7725725642960047e-06,
"loss": 0.5336,
"step": 82
},
{
"epoch": 1.77,
"learning_rate": 3.658434611532578e-06,
"loss": 0.5502,
"step": 83
},
{
"epoch": 1.79,
"learning_rate": 3.545045152411178e-06,
"loss": 0.5162,
"step": 84
},
{
"epoch": 1.81,
"learning_rate": 3.4324674498110956e-06,
"loss": 0.175,
"step": 85
},
{
"epoch": 1.83,
"learning_rate": 3.3207643137118872e-06,
"loss": 0.5485,
"step": 86
},
{
"epoch": 1.86,
"learning_rate": 3.2099980661501016e-06,
"loss": 0.5243,
"step": 87
},
{
"epoch": 1.88,
"learning_rate": 3.1002305064482006e-06,
"loss": 0.5174,
"step": 88
},
{
"epoch": 1.9,
"learning_rate": 2.991522876735154e-06,
"loss": 0.5354,
"step": 89
},
{
"epoch": 1.92,
"learning_rate": 2.8839358277778758e-06,
"loss": 0.5243,
"step": 90
},
{
"epoch": 1.94,
"learning_rate": 2.7775293851426233e-06,
"loss": 0.5518,
"step": 91
},
{
"epoch": 1.96,
"learning_rate": 2.6723629157051844e-06,
"loss": 0.5625,
"step": 92
},
{
"epoch": 1.98,
"learning_rate": 2.5684950945285937e-06,
"loss": 0.5083,
"step": 93
},
{
"epoch": 2.01,
"learning_rate": 2.4659838721268005e-06,
"loss": 0.2397,
"step": 94
},
{
"epoch": 2.03,
"learning_rate": 2.364886442132606e-06,
"loss": 0.4048,
"step": 95
},
{
"epoch": 2.05,
"learning_rate": 2.265259209387867e-06,
"loss": 0.3924,
"step": 96
},
{
"epoch": 2.07,
"learning_rate": 2.16715775847379e-06,
"loss": 0.3762,
"step": 97
},
{
"epoch": 2.09,
"learning_rate": 2.0706368226988772e-06,
"loss": 0.3902,
"step": 98
},
{
"epoch": 2.11,
"learning_rate": 1.9757502535618137e-06,
"loss": 0.4002,
"step": 99
},
{
"epoch": 2.13,
"learning_rate": 1.8825509907063328e-06,
"loss": 0.4317,
"step": 100
},
{
"epoch": 2.15,
"learning_rate": 1.7910910323848435e-06,
"loss": 0.4067,
"step": 101
},
{
"epoch": 2.18,
"learning_rate": 1.7014214064472646e-06,
"loss": 0.4105,
"step": 102
},
{
"epoch": 2.2,
"learning_rate": 1.6135921418712959e-06,
"loss": 0.3521,
"step": 103
},
{
"epoch": 2.22,
"learning_rate": 1.5276522408499567e-06,
"loss": 0.369,
"step": 104
},
{
"epoch": 2.24,
"learning_rate": 1.4436496514520253e-06,
"loss": 0.3684,
"step": 105
},
{
"epoch": 2.26,
"learning_rate": 1.361631240870569e-06,
"loss": 0.3558,
"step": 106
},
{
"epoch": 2.28,
"learning_rate": 1.281642769274552e-06,
"loss": 0.3689,
"step": 107
},
{
"epoch": 2.3,
"learning_rate": 1.2037288642780575e-06,
"loss": 0.3308,
"step": 108
},
{
"epoch": 2.33,
"learning_rate": 1.1279329960414047e-06,
"loss": 0.3509,
"step": 109
},
{
"epoch": 2.35,
"learning_rate": 1.0542974530180327e-06,
"loss": 0.3442,
"step": 110
},
{
"epoch": 2.37,
"learning_rate": 9.82863318360695e-07,
"loss": 0.3694,
"step": 111
},
{
"epoch": 2.39,
"learning_rate": 9.136704470001101e-07,
"loss": 0.3714,
"step": 112
},
{
"epoch": 2.41,
"learning_rate": 8.46757443408886e-07,
"loss": 0.3559,
"step": 113
},
{
"epoch": 2.43,
"learning_rate": 7.821616400630866e-07,
"loss": 0.375,
"step": 114
},
{
"epoch": 2.45,
"learning_rate": 7.199190766135001e-07,
"loss": 0.3558,
"step": 115
},
{
"epoch": 2.47,
"learning_rate": 6.600644797781847e-07,
"loss": 0.3836,
"step": 116
},
{
"epoch": 2.5,
"learning_rate": 6.026312439675553e-07,
"loss": 0.3547,
"step": 117
},
{
"epoch": 2.52,
"learning_rate": 5.476514126527771e-07,
"loss": 0.3729,
"step": 118
},
{
"epoch": 2.54,
"learning_rate": 4.951556604879049e-07,
"loss": 0.3592,
"step": 119
},
{
"epoch": 2.56,
"learning_rate": 4.4517327619569784e-07,
"loss": 0.3452,
"step": 120
},
{
"epoch": 2.58,
"learning_rate": 3.9773214622669974e-07,
"loss": 0.1724,
"step": 121
},
{
"epoch": 2.6,
"learning_rate": 3.528587392006716e-07,
"loss": 0.3604,
"step": 122
},
{
"epoch": 2.62,
"learning_rate": 3.105780911390738e-07,
"loss": 0.3588,
"step": 123
},
{
"epoch": 2.65,
"learning_rate": 2.7091379149682683e-07,
"loss": 0.289,
"step": 124
},
{
"epoch": 2.67,
"learning_rate": 2.3388797000115427e-07,
"loss": 0.1545,
"step": 125
},
{
"epoch": 2.69,
"learning_rate": 1.9952128430483718e-07,
"loss": 0.387,
"step": 126
},
{
"epoch": 2.71,
"learning_rate": 1.6783290846078714e-07,
"loss": 0.3806,
"step": 127
},
{
"epoch": 2.73,
"learning_rate": 1.388405222243472e-07,
"loss": 0.3519,
"step": 128
},
{
"epoch": 2.75,
"learning_rate": 1.1256030118930727e-07,
"loss": 0.3362,
"step": 129
},
{
"epoch": 2.77,
"learning_rate": 8.900690776312282e-08,
"loss": 0.3886,
"step": 130
},
{
"epoch": 2.79,
"learning_rate": 6.819348298638839e-08,
"loss": 0.3294,
"step": 131
},
{
"epoch": 2.82,
"learning_rate": 5.013163920110864e-08,
"loss": 0.3649,
"step": 132
},
{
"epoch": 2.84,
"learning_rate": 3.483145357187967e-08,
"loss": 0.3504,
"step": 133
},
{
"epoch": 2.86,
"learning_rate": 2.230146246358256e-08,
"loss": 0.3427,
"step": 134
},
{
"epoch": 2.88,
"learning_rate": 1.2548656678721404e-08,
"loss": 0.3589,
"step": 135
},
{
"epoch": 2.9,
"learning_rate": 5.578477557081074e-09,
"loss": 0.3562,
"step": 136
},
{
"epoch": 2.92,
"learning_rate": 1.3948139398628492e-09,
"loss": 0.3825,
"step": 137
},
{
"epoch": 2.94,
"learning_rate": 0.0,
"loss": 0.3726,
"step": 138
},
{
"epoch": 2.94,
"step": 138,
"total_flos": 280987049787392.0,
"train_loss": 0.5631573870778084,
"train_runtime": 3550.0224,
"train_samples_per_second": 5.07,
"train_steps_per_second": 0.039
}
],
"max_steps": 138,
"num_train_epochs": 3,
"total_flos": 280987049787392.0,
"trial_name": null,
"trial_params": null
}