nutorbit commited on
Commit
bbb6f76
1 Parent(s): 2268c92

Training in progress, step 450, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e0771c51a0ae344a14b4c2d53a26e2fc5c42fd152d30aa38d11433481c7c75b
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:007809edd7af4f540d9ef711501d9600279a89047629f58c6f5b36c1156a331e
3
  size 83945296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05f3f7f0a4d3289288523f09ead26de946c48ea657a398d0d84cb163880b6a26
3
  size 42546196
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34943ec29084e48b3f49590d338f41865c45622b34f694d06283ca1ce73a16b8
3
  size 42546196
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87113e4cb84c061b6cfa020723b20af9795b52023187a5bcaf6406cd9d955f48
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0d29c136a058d95ad4140ff9606955f9a05a50a3e532f1b4839b76863188a7c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8fa8d596f677db656253aa29e7d5c21bde227ca39a0071d240ec6415eee1bd74
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4e97d6e4a8f37abdf7b71b21660735ad9bd8fbfe143b0732cce58d9053bbb5d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.08034785896587579,
5
  "eval_steps": 1000,
6
- "global_step": 425,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2557,13 +2557,163 @@
2557
  "learning_rate": 3.1451612903225806e-05,
2558
  "loss": 1.6885,
2559
  "step": 425
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2560
  }
2561
  ],
2562
  "logging_steps": 1,
2563
  "max_steps": 501,
2564
  "num_train_epochs": 1,
2565
  "save_steps": 25,
2566
- "total_flos": 2.627969166011597e+16,
2567
  "trial_name": null,
2568
  "trial_params": null
2569
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.08507420361092731,
5
  "eval_steps": 1000,
6
+ "global_step": 450,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2557
  "learning_rate": 3.1451612903225806e-05,
2558
  "loss": 1.6885,
2559
  "step": 425
2560
+ },
2561
+ {
2562
+ "epoch": 0.08,
2563
+ "learning_rate": 3.1048387096774195e-05,
2564
+ "loss": 1.7086,
2565
+ "step": 426
2566
+ },
2567
+ {
2568
+ "epoch": 0.08,
2569
+ "learning_rate": 3.0645161290322585e-05,
2570
+ "loss": 1.6276,
2571
+ "step": 427
2572
+ },
2573
+ {
2574
+ "epoch": 0.08,
2575
+ "learning_rate": 3.024193548387097e-05,
2576
+ "loss": 1.4247,
2577
+ "step": 428
2578
+ },
2579
+ {
2580
+ "epoch": 0.08,
2581
+ "learning_rate": 2.9838709677419357e-05,
2582
+ "loss": 1.5147,
2583
+ "step": 429
2584
+ },
2585
+ {
2586
+ "epoch": 0.08,
2587
+ "learning_rate": 2.9435483870967743e-05,
2588
+ "loss": 1.5369,
2589
+ "step": 430
2590
+ },
2591
+ {
2592
+ "epoch": 0.08,
2593
+ "learning_rate": 2.9032258064516133e-05,
2594
+ "loss": 1.6678,
2595
+ "step": 431
2596
+ },
2597
+ {
2598
+ "epoch": 0.08,
2599
+ "learning_rate": 2.862903225806452e-05,
2600
+ "loss": 1.6508,
2601
+ "step": 432
2602
+ },
2603
+ {
2604
+ "epoch": 0.08,
2605
+ "learning_rate": 2.822580645161291e-05,
2606
+ "loss": 1.7007,
2607
+ "step": 433
2608
+ },
2609
+ {
2610
+ "epoch": 0.08,
2611
+ "learning_rate": 2.7822580645161288e-05,
2612
+ "loss": 1.5613,
2613
+ "step": 434
2614
+ },
2615
+ {
2616
+ "epoch": 0.08,
2617
+ "learning_rate": 2.7419354838709678e-05,
2618
+ "loss": 1.6609,
2619
+ "step": 435
2620
+ },
2621
+ {
2622
+ "epoch": 0.08,
2623
+ "learning_rate": 2.7016129032258064e-05,
2624
+ "loss": 1.6067,
2625
+ "step": 436
2626
+ },
2627
+ {
2628
+ "epoch": 0.08,
2629
+ "learning_rate": 2.661290322580645e-05,
2630
+ "loss": 1.6103,
2631
+ "step": 437
2632
+ },
2633
+ {
2634
+ "epoch": 0.08,
2635
+ "learning_rate": 2.620967741935484e-05,
2636
+ "loss": 1.8243,
2637
+ "step": 438
2638
+ },
2639
+ {
2640
+ "epoch": 0.08,
2641
+ "learning_rate": 2.5806451612903226e-05,
2642
+ "loss": 1.744,
2643
+ "step": 439
2644
+ },
2645
+ {
2646
+ "epoch": 0.08,
2647
+ "learning_rate": 2.5403225806451615e-05,
2648
+ "loss": 1.7083,
2649
+ "step": 440
2650
+ },
2651
+ {
2652
+ "epoch": 0.08,
2653
+ "learning_rate": 2.5e-05,
2654
+ "loss": 1.4637,
2655
+ "step": 441
2656
+ },
2657
+ {
2658
+ "epoch": 0.08,
2659
+ "learning_rate": 2.4596774193548387e-05,
2660
+ "loss": 1.7277,
2661
+ "step": 442
2662
+ },
2663
+ {
2664
+ "epoch": 0.08,
2665
+ "learning_rate": 2.4193548387096777e-05,
2666
+ "loss": 1.7535,
2667
+ "step": 443
2668
+ },
2669
+ {
2670
+ "epoch": 0.08,
2671
+ "learning_rate": 2.3790322580645163e-05,
2672
+ "loss": 1.713,
2673
+ "step": 444
2674
+ },
2675
+ {
2676
+ "epoch": 0.08,
2677
+ "learning_rate": 2.338709677419355e-05,
2678
+ "loss": 1.6814,
2679
+ "step": 445
2680
+ },
2681
+ {
2682
+ "epoch": 0.08,
2683
+ "learning_rate": 2.2983870967741935e-05,
2684
+ "loss": 1.6429,
2685
+ "step": 446
2686
+ },
2687
+ {
2688
+ "epoch": 0.08,
2689
+ "learning_rate": 2.258064516129032e-05,
2690
+ "loss": 1.6736,
2691
+ "step": 447
2692
+ },
2693
+ {
2694
+ "epoch": 0.08,
2695
+ "learning_rate": 2.217741935483871e-05,
2696
+ "loss": 1.5785,
2697
+ "step": 448
2698
+ },
2699
+ {
2700
+ "epoch": 0.08,
2701
+ "learning_rate": 2.1774193548387097e-05,
2702
+ "loss": 1.7577,
2703
+ "step": 449
2704
+ },
2705
+ {
2706
+ "epoch": 0.09,
2707
+ "learning_rate": 2.1370967741935487e-05,
2708
+ "loss": 1.5823,
2709
+ "step": 450
2710
  }
2711
  ],
2712
  "logging_steps": 1,
2713
  "max_steps": 501,
2714
  "num_train_epochs": 1,
2715
  "save_steps": 25,
2716
+ "total_flos": 2.779017123638477e+16,
2717
  "trial_name": null,
2718
  "trial_params": null
2719
  }