nttx commited on
Commit
50bea87
1 Parent(s): 8944386

Training in progress, step 172, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2efa61628380ec6f2ac140ab19d93abb9fc7b8d46f4c418dffc2b55eace6b22a
3
  size 50358592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29cc34a3cd885733d3c4faf7a97b4a5d9bf87a161d165305cf2dcf585b8d0c58
3
  size 50358592
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9a45d2b89bf8b3b5abf0972bc838ff277c4043abd276af2bea1200a62239c87
3
  size 100824826
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbf0ca6be6ea824c85bf807fb40498462db82a8f6eadbd7efa399e0c0543f8d3
3
  size 100824826
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:848ea3c58951903fc03b92351f96f9f7af95e3edd79d2660b6228841524d09c0
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d639eb250c7c55b6a29fdd69e4579d94bfaba9136e8b42af916ae34fb1964068
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa45a5ad3b1db9e5459aebacc7177d8fadf9b32be0f1806d3c11e0a76edf93bd
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10e21e4fb66c3880ee4f2f331927101dc991ccbe5e792512d3166ae12e4bbde2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.970888078212738,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
- "epoch": 1.744186046511628,
5
  "eval_steps": 25,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1113,6 +1113,160 @@
1113
  "eval_samples_per_second": 6.243,
1114
  "eval_steps_per_second": 0.874,
1115
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1116
  }
1117
  ],
1118
  "logging_steps": 1,
@@ -1136,12 +1290,12 @@
1136
  "should_evaluate": false,
1137
  "should_log": false,
1138
  "should_save": true,
1139
- "should_training_stop": false
1140
  },
1141
  "attributes": {}
1142
  }
1143
  },
1144
- "total_flos": 6.539348896540262e+16,
1145
  "train_batch_size": 8,
1146
  "trial_name": null,
1147
  "trial_params": null
 
1
  {
2
  "best_metric": 0.970888078212738,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
+ "epoch": 2.0,
5
  "eval_steps": 25,
6
+ "global_step": 172,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1113
  "eval_samples_per_second": 6.243,
1114
  "eval_steps_per_second": 0.874,
1115
  "step": 150
1116
+ },
1117
+ {
1118
+ "epoch": 1.755813953488372,
1119
+ "grad_norm": 1.1184589862823486,
1120
+ "learning_rate": 1.2267583967958916e-05,
1121
+ "loss": 2.6164,
1122
+ "step": 151
1123
+ },
1124
+ {
1125
+ "epoch": 1.7674418604651163,
1126
+ "grad_norm": 0.9397222995758057,
1127
+ "learning_rate": 1.1141404609666449e-05,
1128
+ "loss": 3.2114,
1129
+ "step": 152
1130
+ },
1131
+ {
1132
+ "epoch": 1.7790697674418605,
1133
+ "grad_norm": 0.8842024803161621,
1134
+ "learning_rate": 1.0067444290199917e-05,
1135
+ "loss": 3.6752,
1136
+ "step": 153
1137
+ },
1138
+ {
1139
+ "epoch": 1.7906976744186047,
1140
+ "grad_norm": 0.9894306659698486,
1141
+ "learning_rate": 9.046106882113751e-06,
1142
+ "loss": 3.8772,
1143
+ "step": 154
1144
+ },
1145
+ {
1146
+ "epoch": 1.802325581395349,
1147
+ "grad_norm": 0.9347633123397827,
1148
+ "learning_rate": 8.07777646863746e-06,
1149
+ "loss": 3.9717,
1150
+ "step": 155
1151
+ },
1152
+ {
1153
+ "epoch": 1.8139534883720931,
1154
+ "grad_norm": 1.0060522556304932,
1155
+ "learning_rate": 7.1628171992377025e-06,
1156
+ "loss": 4.3449,
1157
+ "step": 156
1158
+ },
1159
+ {
1160
+ "epoch": 1.8255813953488373,
1161
+ "grad_norm": 0.9990627765655518,
1162
+ "learning_rate": 6.301573152676664e-06,
1163
+ "loss": 4.1012,
1164
+ "step": 157
1165
+ },
1166
+ {
1167
+ "epoch": 1.8372093023255816,
1168
+ "grad_norm": 1.0378891229629517,
1169
+ "learning_rate": 5.494368207617949e-06,
1170
+ "loss": 4.0339,
1171
+ "step": 158
1172
+ },
1173
+ {
1174
+ "epoch": 1.8488372093023255,
1175
+ "grad_norm": 1.0531872510910034,
1176
+ "learning_rate": 4.741505920829131e-06,
1177
+ "loss": 4.4799,
1178
+ "step": 159
1179
+ },
1180
+ {
1181
+ "epoch": 1.8604651162790697,
1182
+ "grad_norm": 0.9710641503334045,
1183
+ "learning_rate": 4.043269413026429e-06,
1184
+ "loss": 3.6334,
1185
+ "step": 160
1186
+ },
1187
+ {
1188
+ "epoch": 1.872093023255814,
1189
+ "grad_norm": 0.9414262771606445,
1190
+ "learning_rate": 3.3999212624046646e-06,
1191
+ "loss": 3.8207,
1192
+ "step": 161
1193
+ },
1194
+ {
1195
+ "epoch": 1.8837209302325582,
1196
+ "grad_norm": 0.8727117776870728,
1197
+ "learning_rate": 2.811703405892296e-06,
1198
+ "loss": 3.6237,
1199
+ "step": 162
1200
+ },
1201
+ {
1202
+ "epoch": 1.8953488372093024,
1203
+ "grad_norm": 0.8367646932601929,
1204
+ "learning_rate": 2.2788370481687965e-06,
1205
+ "loss": 3.5522,
1206
+ "step": 163
1207
+ },
1208
+ {
1209
+ "epoch": 1.9069767441860463,
1210
+ "grad_norm": 0.9638428688049316,
1211
+ "learning_rate": 1.801522578478648e-06,
1212
+ "loss": 3.7745,
1213
+ "step": 164
1214
+ },
1215
+ {
1216
+ "epoch": 1.9186046511627906,
1217
+ "grad_norm": 1.643754005432129,
1218
+ "learning_rate": 1.3799394952732024e-06,
1219
+ "loss": 4.3574,
1220
+ "step": 165
1221
+ },
1222
+ {
1223
+ "epoch": 1.9302325581395348,
1224
+ "grad_norm": 0.9748329520225525,
1225
+ "learning_rate": 1.0142463387085464e-06,
1226
+ "loss": 4.2242,
1227
+ "step": 166
1228
+ },
1229
+ {
1230
+ "epoch": 1.941860465116279,
1231
+ "grad_norm": 0.938869297504425,
1232
+ "learning_rate": 7.045806310251257e-07,
1233
+ "loss": 4.0438,
1234
+ "step": 167
1235
+ },
1236
+ {
1237
+ "epoch": 1.9534883720930232,
1238
+ "grad_norm": 1.0137250423431396,
1239
+ "learning_rate": 4.510588248311964e-07,
1240
+ "loss": 4.342,
1241
+ "step": 168
1242
+ },
1243
+ {
1244
+ "epoch": 1.9651162790697674,
1245
+ "grad_norm": 1.2118498086929321,
1246
+ "learning_rate": 2.5377625930977363e-07,
1247
+ "loss": 4.5787,
1248
+ "step": 169
1249
+ },
1250
+ {
1251
+ "epoch": 1.9767441860465116,
1252
+ "grad_norm": 2.177273750305176,
1253
+ "learning_rate": 1.1280712436549378e-07,
1254
+ "loss": 4.6907,
1255
+ "step": 170
1256
+ },
1257
+ {
1258
+ "epoch": 1.9883720930232558,
1259
+ "grad_norm": 0.9952888488769531,
1260
+ "learning_rate": 2.8204432724798775e-08,
1261
+ "loss": 3.898,
1262
+ "step": 171
1263
+ },
1264
+ {
1265
+ "epoch": 2.0,
1266
+ "grad_norm": 2.449486017227173,
1267
+ "learning_rate": 0.0,
1268
+ "loss": 5.0474,
1269
+ "step": 172
1270
  }
1271
  ],
1272
  "logging_steps": 1,
 
1290
  "should_evaluate": false,
1291
  "should_log": false,
1292
  "should_save": true,
1293
+ "should_training_stop": true
1294
  },
1295
  "attributes": {}
1296
  }
1297
  },
1298
+ "total_flos": 7.485613329088512e+16,
1299
  "train_batch_size": 8,
1300
  "trial_name": null,
1301
  "trial_params": null