nutorbit commited on
Commit
b6ccef1
1 Parent(s): e25e647

Training in progress, step 275, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2a2797b5eb8e207d71f8697295ab040c99d9447a55cb7a45765b5fcc0018149
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d723d83d4eb8000019aebd0817af9932a2d1202b0ec4807093bf548ac146439
3
  size 83945296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b9154468633c6d826e0b060027d9b88d350bf29d70b7280b830ee0a12499b1e
3
- size 42545748
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6eb1db2e964ec0b2c98974ec093f4bee824e1132ff949cf3e862dd0d5628c169
3
+ size 42546196
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8938b73d98fc27010dbc6b77bb6e0a5a6aa78de2bac41513e8f9a1e2730ccea
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:560e09aa52f8a763465aa7c2436802878c12b49a525a49aed5ec676ef95f5745
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93ff724ef7d9ff62658c9e4239430ba619df4bfe00095bb195d6ed43c64449e2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf9c36987bd7f761cf696e6b1b4a757a9b4bd2c54712ce685b5c522e621e6d0d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.047263446450515174,
5
  "eval_steps": 1000,
6
- "global_step": 250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1507,13 +1507,163 @@
1507
  "learning_rate": 0.00010201612903225808,
1508
  "loss": 1.5656,
1509
  "step": 250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1510
  }
1511
  ],
1512
  "logging_steps": 1,
1513
  "max_steps": 501,
1514
  "num_train_epochs": 1,
1515
  "save_steps": 25,
1516
- "total_flos": 1.5348012927320064e+16,
1517
  "trial_name": null,
1518
  "trial_params": null
1519
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.05198979109556669,
5
  "eval_steps": 1000,
6
+ "global_step": 275,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1507
  "learning_rate": 0.00010201612903225808,
1508
  "loss": 1.5656,
1509
  "step": 250
1510
+ },
1511
+ {
1512
+ "epoch": 0.05,
1513
+ "learning_rate": 0.00010161290322580646,
1514
+ "loss": 1.6925,
1515
+ "step": 251
1516
+ },
1517
+ {
1518
+ "epoch": 0.05,
1519
+ "learning_rate": 0.00010120967741935485,
1520
+ "loss": 1.5055,
1521
+ "step": 252
1522
+ },
1523
+ {
1524
+ "epoch": 0.05,
1525
+ "learning_rate": 0.00010080645161290323,
1526
+ "loss": 1.5479,
1527
+ "step": 253
1528
+ },
1529
+ {
1530
+ "epoch": 0.05,
1531
+ "learning_rate": 0.00010040322580645163,
1532
+ "loss": 1.6352,
1533
+ "step": 254
1534
+ },
1535
+ {
1536
+ "epoch": 0.05,
1537
+ "learning_rate": 0.0001,
1538
+ "loss": 1.6747,
1539
+ "step": 255
1540
+ },
1541
+ {
1542
+ "epoch": 0.05,
1543
+ "learning_rate": 9.95967741935484e-05,
1544
+ "loss": 1.7197,
1545
+ "step": 256
1546
+ },
1547
+ {
1548
+ "epoch": 0.05,
1549
+ "learning_rate": 9.919354838709678e-05,
1550
+ "loss": 1.5599,
1551
+ "step": 257
1552
+ },
1553
+ {
1554
+ "epoch": 0.05,
1555
+ "learning_rate": 9.879032258064517e-05,
1556
+ "loss": 1.4343,
1557
+ "step": 258
1558
+ },
1559
+ {
1560
+ "epoch": 0.05,
1561
+ "learning_rate": 9.838709677419355e-05,
1562
+ "loss": 1.6037,
1563
+ "step": 259
1564
+ },
1565
+ {
1566
+ "epoch": 0.05,
1567
+ "learning_rate": 9.798387096774194e-05,
1568
+ "loss": 1.6565,
1569
+ "step": 260
1570
+ },
1571
+ {
1572
+ "epoch": 0.05,
1573
+ "learning_rate": 9.758064516129033e-05,
1574
+ "loss": 1.6615,
1575
+ "step": 261
1576
+ },
1577
+ {
1578
+ "epoch": 0.05,
1579
+ "learning_rate": 9.717741935483872e-05,
1580
+ "loss": 1.6193,
1581
+ "step": 262
1582
+ },
1583
+ {
1584
+ "epoch": 0.05,
1585
+ "learning_rate": 9.677419354838711e-05,
1586
+ "loss": 1.5369,
1587
+ "step": 263
1588
+ },
1589
+ {
1590
+ "epoch": 0.05,
1591
+ "learning_rate": 9.63709677419355e-05,
1592
+ "loss": 1.7713,
1593
+ "step": 264
1594
+ },
1595
+ {
1596
+ "epoch": 0.05,
1597
+ "learning_rate": 9.596774193548387e-05,
1598
+ "loss": 1.6298,
1599
+ "step": 265
1600
+ },
1601
+ {
1602
+ "epoch": 0.05,
1603
+ "learning_rate": 9.556451612903226e-05,
1604
+ "loss": 1.4293,
1605
+ "step": 266
1606
+ },
1607
+ {
1608
+ "epoch": 0.05,
1609
+ "learning_rate": 9.516129032258065e-05,
1610
+ "loss": 1.5491,
1611
+ "step": 267
1612
+ },
1613
+ {
1614
+ "epoch": 0.05,
1615
+ "learning_rate": 9.475806451612904e-05,
1616
+ "loss": 1.6733,
1617
+ "step": 268
1618
+ },
1619
+ {
1620
+ "epoch": 0.05,
1621
+ "learning_rate": 9.435483870967743e-05,
1622
+ "loss": 1.6588,
1623
+ "step": 269
1624
+ },
1625
+ {
1626
+ "epoch": 0.05,
1627
+ "learning_rate": 9.395161290322582e-05,
1628
+ "loss": 1.5837,
1629
+ "step": 270
1630
+ },
1631
+ {
1632
+ "epoch": 0.05,
1633
+ "learning_rate": 9.35483870967742e-05,
1634
+ "loss": 1.3857,
1635
+ "step": 271
1636
+ },
1637
+ {
1638
+ "epoch": 0.05,
1639
+ "learning_rate": 9.314516129032259e-05,
1640
+ "loss": 1.6731,
1641
+ "step": 272
1642
+ },
1643
+ {
1644
+ "epoch": 0.05,
1645
+ "learning_rate": 9.274193548387096e-05,
1646
+ "loss": 1.5275,
1647
+ "step": 273
1648
+ },
1649
+ {
1650
+ "epoch": 0.05,
1651
+ "learning_rate": 9.233870967741935e-05,
1652
+ "loss": 1.5816,
1653
+ "step": 274
1654
+ },
1655
+ {
1656
+ "epoch": 0.05,
1657
+ "learning_rate": 9.193548387096774e-05,
1658
+ "loss": 1.7432,
1659
+ "step": 275
1660
  }
1661
  ],
1662
  "logging_steps": 1,
1663
  "max_steps": 501,
1664
  "num_train_epochs": 1,
1665
  "save_steps": 25,
1666
+ "total_flos": 1.6916857776734208e+16,
1667
  "trial_name": null,
1668
  "trial_params": null
1669
  }