nutorbit commited on
Commit
4ba07ef
1 Parent(s): df6e95b

Training in progress, step 225, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbad3c32c72278fec001c7077781d678cb2fd778bad07da7267a2bb333cfc817
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cd3afe7a5c6f92bd8e924895f18f69469f6685aa2ef2184f059a984d1d8379d
3
  size 83945296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d16f5ceae6943b24109cb0f735b99bdbb139479c50060547c712e2ddbf51a91b
3
  size 42545748
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a7b86ad7c0e6025be5bc2c8ffed0cc7dbd120f0ca26c5fd49aa808197054cda
3
  size 42545748
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d3f7a56ad1a475bba2d0785209c1428117c6797270fe6f67d0179354e163bdc
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:772d5a327fbf7113e8f59462d16a41a74a6390c940f2e126e84a4dadeb8b31f6
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b88b2d08d068d7c188fcae65f7ab4eeb2b01722602970224771334b164e05369
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17943b933296baae03d840ff112ff0e3c38315c7cfbb8f89c292f61b690fac96
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.037810757160412135,
5
  "eval_steps": 1000,
6
- "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1207,13 +1207,163 @@
1207
  "learning_rate": 0.0001221774193548387,
1208
  "loss": 1.6444,
1209
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1210
  }
1211
  ],
1212
  "logging_steps": 1,
1213
  "max_steps": 501,
1214
  "num_train_epochs": 1,
1215
  "save_steps": 25,
1216
- "total_flos": 1.2368645450366976e+16,
1217
  "trial_name": null,
1218
  "trial_params": null
1219
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.042537101805463655,
5
  "eval_steps": 1000,
6
+ "global_step": 225,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1207
  "learning_rate": 0.0001221774193548387,
1208
  "loss": 1.6444,
1209
  "step": 200
1210
+ },
1211
+ {
1212
+ "epoch": 0.04,
1213
+ "learning_rate": 0.0001217741935483871,
1214
+ "loss": 1.6056,
1215
+ "step": 201
1216
+ },
1217
+ {
1218
+ "epoch": 0.04,
1219
+ "learning_rate": 0.00012137096774193548,
1220
+ "loss": 1.7643,
1221
+ "step": 202
1222
+ },
1223
+ {
1224
+ "epoch": 0.04,
1225
+ "learning_rate": 0.00012096774193548388,
1226
+ "loss": 1.6614,
1227
+ "step": 203
1228
+ },
1229
+ {
1230
+ "epoch": 0.04,
1231
+ "learning_rate": 0.00012056451612903226,
1232
+ "loss": 1.4906,
1233
+ "step": 204
1234
+ },
1235
+ {
1236
+ "epoch": 0.04,
1237
+ "learning_rate": 0.00012016129032258065,
1238
+ "loss": 1.598,
1239
+ "step": 205
1240
+ },
1241
+ {
1242
+ "epoch": 0.04,
1243
+ "learning_rate": 0.00011975806451612903,
1244
+ "loss": 1.6326,
1245
+ "step": 206
1246
+ },
1247
+ {
1248
+ "epoch": 0.04,
1249
+ "learning_rate": 0.00011935483870967743,
1250
+ "loss": 1.6153,
1251
+ "step": 207
1252
+ },
1253
+ {
1254
+ "epoch": 0.04,
1255
+ "learning_rate": 0.0001189516129032258,
1256
+ "loss": 1.6332,
1257
+ "step": 208
1258
+ },
1259
+ {
1260
+ "epoch": 0.04,
1261
+ "learning_rate": 0.00011854838709677421,
1262
+ "loss": 1.6477,
1263
+ "step": 209
1264
+ },
1265
+ {
1266
+ "epoch": 0.04,
1267
+ "learning_rate": 0.00011814516129032258,
1268
+ "loss": 1.6652,
1269
+ "step": 210
1270
+ },
1271
+ {
1272
+ "epoch": 0.04,
1273
+ "learning_rate": 0.00011774193548387097,
1274
+ "loss": 1.412,
1275
+ "step": 211
1276
+ },
1277
+ {
1278
+ "epoch": 0.04,
1279
+ "learning_rate": 0.00011733870967741935,
1280
+ "loss": 1.5335,
1281
+ "step": 212
1282
+ },
1283
+ {
1284
+ "epoch": 0.04,
1285
+ "learning_rate": 0.00011693548387096775,
1286
+ "loss": 1.6941,
1287
+ "step": 213
1288
+ },
1289
+ {
1290
+ "epoch": 0.04,
1291
+ "learning_rate": 0.00011653225806451613,
1292
+ "loss": 1.5277,
1293
+ "step": 214
1294
+ },
1295
+ {
1296
+ "epoch": 0.04,
1297
+ "learning_rate": 0.00011612903225806453,
1298
+ "loss": 1.5259,
1299
+ "step": 215
1300
+ },
1301
+ {
1302
+ "epoch": 0.04,
1303
+ "learning_rate": 0.00011572580645161291,
1304
+ "loss": 1.8913,
1305
+ "step": 216
1306
+ },
1307
+ {
1308
+ "epoch": 0.04,
1309
+ "learning_rate": 0.00011532258064516131,
1310
+ "loss": 1.5655,
1311
+ "step": 217
1312
+ },
1313
+ {
1314
+ "epoch": 0.04,
1315
+ "learning_rate": 0.00011491935483870969,
1316
+ "loss": 1.4732,
1317
+ "step": 218
1318
+ },
1319
+ {
1320
+ "epoch": 0.04,
1321
+ "learning_rate": 0.00011451612903225808,
1322
+ "loss": 1.6836,
1323
+ "step": 219
1324
+ },
1325
+ {
1326
+ "epoch": 0.04,
1327
+ "learning_rate": 0.00011411290322580645,
1328
+ "loss": 1.7453,
1329
+ "step": 220
1330
+ },
1331
+ {
1332
+ "epoch": 0.04,
1333
+ "learning_rate": 0.00011370967741935486,
1334
+ "loss": 1.801,
1335
+ "step": 221
1336
+ },
1337
+ {
1338
+ "epoch": 0.04,
1339
+ "learning_rate": 0.00011330645161290323,
1340
+ "loss": 1.6863,
1341
+ "step": 222
1342
+ },
1343
+ {
1344
+ "epoch": 0.04,
1345
+ "learning_rate": 0.00011290322580645163,
1346
+ "loss": 1.7664,
1347
+ "step": 223
1348
+ },
1349
+ {
1350
+ "epoch": 0.04,
1351
+ "learning_rate": 0.00011250000000000001,
1352
+ "loss": 1.7441,
1353
+ "step": 224
1354
+ },
1355
+ {
1356
+ "epoch": 0.04,
1357
+ "learning_rate": 0.00011209677419354839,
1358
+ "loss": 1.7277,
1359
+ "step": 225
1360
  }
1361
  ],
1362
  "logging_steps": 1,
1363
  "max_steps": 501,
1364
  "num_train_epochs": 1,
1365
  "save_steps": 25,
1366
+ "total_flos": 1.3865689032671232e+16,
1367
  "trial_name": null,
1368
  "trial_params": null
1369
  }