Federic commited on
Commit
d9a993e
1 Parent(s): eafb60b

Training in progress, step 225, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f742d415aff28fad5820b93ca995139bfbf5d48576c37a834817953b34f7e15f
3
  size 1822364248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e529757192ef5dc556b9470603ea65e3cc8c78faf5af23c6612ce22c413c212
3
  size 1822364248
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b552e1af992974094092c9c4643ea82704c3039f8162149d84f54df3a12fa59b
3
  size 650683548
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13527376c2b15a01b0453cda0251ca5ff7a5afcd09d8baa908668c707b8e83ff
3
  size 650683548
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a803e6004d1369697efb3e5bc4932f2d5166e52e06ea951f7120e35a51ae6390
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3162581f3942edb8c96dc2f3201fd2bea68a9cc32a069e77933e7f946e387625
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:859ff0676471245c9481ca25d6d6778d1c7963c39b7877af46bb8ca30a9ead21
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f59a103009f3230e51c40288ef6a33247523fa398934878b1e22a81660cbade8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.6,
5
  "eval_steps": 500,
6
- "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1207,13 +1207,163 @@
1207
  "learning_rate": 0.0002,
1208
  "loss": 0.4536,
1209
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1210
  }
1211
  ],
1212
  "logging_steps": 1,
1213
  "max_steps": 250,
1214
  "num_train_epochs": 2,
1215
  "save_steps": 25,
1216
- "total_flos": 2.601838334381261e+16,
1217
  "trial_name": null,
1218
  "trial_params": null
1219
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.8,
5
  "eval_steps": 500,
6
+ "global_step": 225,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1207
  "learning_rate": 0.0002,
1208
  "loss": 0.4536,
1209
  "step": 200
1210
+ },
1211
+ {
1212
+ "epoch": 1.61,
1213
+ "learning_rate": 0.0002,
1214
+ "loss": 0.4501,
1215
+ "step": 201
1216
+ },
1217
+ {
1218
+ "epoch": 1.62,
1219
+ "learning_rate": 0.0002,
1220
+ "loss": 0.4893,
1221
+ "step": 202
1222
+ },
1223
+ {
1224
+ "epoch": 1.62,
1225
+ "learning_rate": 0.0002,
1226
+ "loss": 0.4231,
1227
+ "step": 203
1228
+ },
1229
+ {
1230
+ "epoch": 1.63,
1231
+ "learning_rate": 0.0002,
1232
+ "loss": 0.446,
1233
+ "step": 204
1234
+ },
1235
+ {
1236
+ "epoch": 1.64,
1237
+ "learning_rate": 0.0002,
1238
+ "loss": 0.4262,
1239
+ "step": 205
1240
+ },
1241
+ {
1242
+ "epoch": 1.65,
1243
+ "learning_rate": 0.0002,
1244
+ "loss": 0.4258,
1245
+ "step": 206
1246
+ },
1247
+ {
1248
+ "epoch": 1.66,
1249
+ "learning_rate": 0.0002,
1250
+ "loss": 0.4337,
1251
+ "step": 207
1252
+ },
1253
+ {
1254
+ "epoch": 1.66,
1255
+ "learning_rate": 0.0002,
1256
+ "loss": 0.3736,
1257
+ "step": 208
1258
+ },
1259
+ {
1260
+ "epoch": 1.67,
1261
+ "learning_rate": 0.0002,
1262
+ "loss": 0.4079,
1263
+ "step": 209
1264
+ },
1265
+ {
1266
+ "epoch": 1.68,
1267
+ "learning_rate": 0.0002,
1268
+ "loss": 0.3848,
1269
+ "step": 210
1270
+ },
1271
+ {
1272
+ "epoch": 1.69,
1273
+ "learning_rate": 0.0002,
1274
+ "loss": 0.4034,
1275
+ "step": 211
1276
+ },
1277
+ {
1278
+ "epoch": 1.7,
1279
+ "learning_rate": 0.0002,
1280
+ "loss": 0.3581,
1281
+ "step": 212
1282
+ },
1283
+ {
1284
+ "epoch": 1.7,
1285
+ "learning_rate": 0.0002,
1286
+ "loss": 0.3691,
1287
+ "step": 213
1288
+ },
1289
+ {
1290
+ "epoch": 1.71,
1291
+ "learning_rate": 0.0002,
1292
+ "loss": 0.3856,
1293
+ "step": 214
1294
+ },
1295
+ {
1296
+ "epoch": 1.72,
1297
+ "learning_rate": 0.0002,
1298
+ "loss": 0.3688,
1299
+ "step": 215
1300
+ },
1301
+ {
1302
+ "epoch": 1.73,
1303
+ "learning_rate": 0.0002,
1304
+ "loss": 0.3378,
1305
+ "step": 216
1306
+ },
1307
+ {
1308
+ "epoch": 1.74,
1309
+ "learning_rate": 0.0002,
1310
+ "loss": 0.3663,
1311
+ "step": 217
1312
+ },
1313
+ {
1314
+ "epoch": 1.74,
1315
+ "learning_rate": 0.0002,
1316
+ "loss": 0.3173,
1317
+ "step": 218
1318
+ },
1319
+ {
1320
+ "epoch": 1.75,
1321
+ "learning_rate": 0.0002,
1322
+ "loss": 0.6313,
1323
+ "step": 219
1324
+ },
1325
+ {
1326
+ "epoch": 1.76,
1327
+ "learning_rate": 0.0002,
1328
+ "loss": 0.5564,
1329
+ "step": 220
1330
+ },
1331
+ {
1332
+ "epoch": 1.77,
1333
+ "learning_rate": 0.0002,
1334
+ "loss": 0.5533,
1335
+ "step": 221
1336
+ },
1337
+ {
1338
+ "epoch": 1.78,
1339
+ "learning_rate": 0.0002,
1340
+ "loss": 0.5213,
1341
+ "step": 222
1342
+ },
1343
+ {
1344
+ "epoch": 1.78,
1345
+ "learning_rate": 0.0002,
1346
+ "loss": 0.5716,
1347
+ "step": 223
1348
+ },
1349
+ {
1350
+ "epoch": 1.79,
1351
+ "learning_rate": 0.0002,
1352
+ "loss": 0.5287,
1353
+ "step": 224
1354
+ },
1355
+ {
1356
+ "epoch": 1.8,
1357
+ "learning_rate": 0.0002,
1358
+ "loss": 0.5217,
1359
+ "step": 225
1360
  }
1361
  ],
1362
  "logging_steps": 1,
1363
  "max_steps": 250,
1364
  "num_train_epochs": 2,
1365
  "save_steps": 25,
1366
+ "total_flos": 2.9201598472912896e+16,
1367
  "trial_name": null,
1368
  "trial_params": null
1369
  }