Federic commited on
Commit
488f0ac
1 Parent(s): 6f9dfba

Training in progress, step 250, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e529757192ef5dc556b9470603ea65e3cc8c78faf5af23c6612ce22c413c212
3
  size 1822364248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:508dc42434c79f91a7d49265ff32d83ecc2da7320a13c79ea5555038779e1935
3
  size 1822364248
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:13527376c2b15a01b0453cda0251ca5ff7a5afcd09d8baa908668c707b8e83ff
3
  size 650683548
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd6a9817f9474b1499bb8c5bc9ff5ea96d3ac273cd531e12f1dd34daec7e70f1
3
  size 650683548
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3162581f3942edb8c96dc2f3201fd2bea68a9cc32a069e77933e7f946e387625
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27b422f9955c71f5b3366e8b201f25ae0299d3cd4bbe89f91e7d4308a171d786
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f59a103009f3230e51c40288ef6a33247523fa398934878b1e22a81660cbade8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d1650f5062195d8ee65b24ab00a137ab48cccbff41f41ba060d4208547a763c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.8,
5
  "eval_steps": 500,
6
- "global_step": 225,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1357,13 +1357,163 @@
1357
  "learning_rate": 0.0002,
1358
  "loss": 0.5217,
1359
  "step": 225
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1360
  }
1361
  ],
1362
  "logging_steps": 1,
1363
  "max_steps": 250,
1364
  "num_train_epochs": 2,
1365
  "save_steps": 25,
1366
- "total_flos": 2.9201598472912896e+16,
1367
  "trial_name": null,
1368
  "trial_params": null
1369
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
  "eval_steps": 500,
6
+ "global_step": 250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1357
  "learning_rate": 0.0002,
1358
  "loss": 0.5217,
1359
  "step": 225
1360
+ },
1361
+ {
1362
+ "epoch": 1.81,
1363
+ "learning_rate": 0.0002,
1364
+ "loss": 0.5664,
1365
+ "step": 226
1366
+ },
1367
+ {
1368
+ "epoch": 1.82,
1369
+ "learning_rate": 0.0002,
1370
+ "loss": 0.5245,
1371
+ "step": 227
1372
+ },
1373
+ {
1374
+ "epoch": 1.82,
1375
+ "learning_rate": 0.0002,
1376
+ "loss": 0.4929,
1377
+ "step": 228
1378
+ },
1379
+ {
1380
+ "epoch": 1.83,
1381
+ "learning_rate": 0.0002,
1382
+ "loss": 0.4806,
1383
+ "step": 229
1384
+ },
1385
+ {
1386
+ "epoch": 1.84,
1387
+ "learning_rate": 0.0002,
1388
+ "loss": 0.4499,
1389
+ "step": 230
1390
+ },
1391
+ {
1392
+ "epoch": 1.85,
1393
+ "learning_rate": 0.0002,
1394
+ "loss": 0.4791,
1395
+ "step": 231
1396
+ },
1397
+ {
1398
+ "epoch": 1.86,
1399
+ "learning_rate": 0.0002,
1400
+ "loss": 0.4916,
1401
+ "step": 232
1402
+ },
1403
+ {
1404
+ "epoch": 1.86,
1405
+ "learning_rate": 0.0002,
1406
+ "loss": 0.5022,
1407
+ "step": 233
1408
+ },
1409
+ {
1410
+ "epoch": 1.87,
1411
+ "learning_rate": 0.0002,
1412
+ "loss": 0.4708,
1413
+ "step": 234
1414
+ },
1415
+ {
1416
+ "epoch": 1.88,
1417
+ "learning_rate": 0.0002,
1418
+ "loss": 0.4333,
1419
+ "step": 235
1420
+ },
1421
+ {
1422
+ "epoch": 1.89,
1423
+ "learning_rate": 0.0002,
1424
+ "loss": 0.4246,
1425
+ "step": 236
1426
+ },
1427
+ {
1428
+ "epoch": 1.9,
1429
+ "learning_rate": 0.0002,
1430
+ "loss": 0.4481,
1431
+ "step": 237
1432
+ },
1433
+ {
1434
+ "epoch": 1.9,
1435
+ "learning_rate": 0.0002,
1436
+ "loss": 0.4252,
1437
+ "step": 238
1438
+ },
1439
+ {
1440
+ "epoch": 1.91,
1441
+ "learning_rate": 0.0002,
1442
+ "loss": 0.4242,
1443
+ "step": 239
1444
+ },
1445
+ {
1446
+ "epoch": 1.92,
1447
+ "learning_rate": 0.0002,
1448
+ "loss": 0.4012,
1449
+ "step": 240
1450
+ },
1451
+ {
1452
+ "epoch": 1.93,
1453
+ "learning_rate": 0.0002,
1454
+ "loss": 0.3669,
1455
+ "step": 241
1456
+ },
1457
+ {
1458
+ "epoch": 1.94,
1459
+ "learning_rate": 0.0002,
1460
+ "loss": 0.3521,
1461
+ "step": 242
1462
+ },
1463
+ {
1464
+ "epoch": 1.94,
1465
+ "learning_rate": 0.0002,
1466
+ "loss": 0.366,
1467
+ "step": 243
1468
+ },
1469
+ {
1470
+ "epoch": 1.95,
1471
+ "learning_rate": 0.0002,
1472
+ "loss": 0.3356,
1473
+ "step": 244
1474
+ },
1475
+ {
1476
+ "epoch": 1.96,
1477
+ "learning_rate": 0.0002,
1478
+ "loss": 0.3463,
1479
+ "step": 245
1480
+ },
1481
+ {
1482
+ "epoch": 1.97,
1483
+ "learning_rate": 0.0002,
1484
+ "loss": 0.3673,
1485
+ "step": 246
1486
+ },
1487
+ {
1488
+ "epoch": 1.98,
1489
+ "learning_rate": 0.0002,
1490
+ "loss": 0.3366,
1491
+ "step": 247
1492
+ },
1493
+ {
1494
+ "epoch": 1.98,
1495
+ "learning_rate": 0.0002,
1496
+ "loss": 0.3254,
1497
+ "step": 248
1498
+ },
1499
+ {
1500
+ "epoch": 1.99,
1501
+ "learning_rate": 0.0002,
1502
+ "loss": 0.3334,
1503
+ "step": 249
1504
+ },
1505
+ {
1506
+ "epoch": 2.0,
1507
+ "learning_rate": 0.0002,
1508
+ "loss": 0.4874,
1509
+ "step": 250
1510
  }
1511
  ],
1512
  "logging_steps": 1,
1513
  "max_steps": 250,
1514
  "num_train_epochs": 2,
1515
  "save_steps": 25,
1516
+ "total_flos": 3.2208467410059264e+16,
1517
  "trial_name": null,
1518
  "trial_params": null
1519
  }