nutorbit commited on
Commit
62aca38
1 Parent(s): 539ab32

Training in progress, step 475, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:007809edd7af4f540d9ef711501d9600279a89047629f58c6f5b36c1156a331e
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc3524aa66c0624c20b7ed75f0239732b9a0d674504063ee93ca37e881f117d7
3
  size 83945296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34943ec29084e48b3f49590d338f41865c45622b34f694d06283ca1ce73a16b8
3
  size 42546196
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:289f7545377a6902ccc671e39a89e786cef894b58b529dadb726c9ebb9b6db41
3
  size 42546196
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0d29c136a058d95ad4140ff9606955f9a05a50a3e532f1b4839b76863188a7c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d04fa6ba62e6d2693994357040d1f8b0df1a2292ac61d59108919d588e97608
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4e97d6e4a8f37abdf7b71b21660735ad9bd8fbfe143b0732cce58d9053bbb5d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:829afd908950b55f19acd29cd95ce4b7eeb6e3a96dff858cc79db502814ed864
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.08507420361092731,
5
  "eval_steps": 1000,
6
- "global_step": 450,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2707,13 +2707,163 @@
2707
  "learning_rate": 2.1370967741935487e-05,
2708
  "loss": 1.5823,
2709
  "step": 450
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2710
  }
2711
  ],
2712
  "logging_steps": 1,
2713
  "max_steps": 501,
2714
  "num_train_epochs": 1,
2715
  "save_steps": 25,
2716
- "total_flos": 2.779017123638477e+16,
2717
  "trial_name": null,
2718
  "trial_params": null
2719
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.08980054825597883,
5
  "eval_steps": 1000,
6
+ "global_step": 475,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2707
  "learning_rate": 2.1370967741935487e-05,
2708
  "loss": 1.5823,
2709
  "step": 450
2710
+ },
2711
+ {
2712
+ "epoch": 0.09,
2713
+ "learning_rate": 2.0967741935483873e-05,
2714
+ "loss": 1.502,
2715
+ "step": 451
2716
+ },
2717
+ {
2718
+ "epoch": 0.09,
2719
+ "learning_rate": 2.056451612903226e-05,
2720
+ "loss": 1.4476,
2721
+ "step": 452
2722
+ },
2723
+ {
2724
+ "epoch": 0.09,
2725
+ "learning_rate": 2.0161290322580645e-05,
2726
+ "loss": 1.3888,
2727
+ "step": 453
2728
+ },
2729
+ {
2730
+ "epoch": 0.09,
2731
+ "learning_rate": 1.975806451612903e-05,
2732
+ "loss": 1.4592,
2733
+ "step": 454
2734
+ },
2735
+ {
2736
+ "epoch": 0.09,
2737
+ "learning_rate": 1.935483870967742e-05,
2738
+ "loss": 1.489,
2739
+ "step": 455
2740
+ },
2741
+ {
2742
+ "epoch": 0.09,
2743
+ "learning_rate": 1.8951612903225807e-05,
2744
+ "loss": 1.6278,
2745
+ "step": 456
2746
+ },
2747
+ {
2748
+ "epoch": 0.09,
2749
+ "learning_rate": 1.8548387096774193e-05,
2750
+ "loss": 1.7016,
2751
+ "step": 457
2752
+ },
2753
+ {
2754
+ "epoch": 0.09,
2755
+ "learning_rate": 1.8145161290322583e-05,
2756
+ "loss": 1.622,
2757
+ "step": 458
2758
+ },
2759
+ {
2760
+ "epoch": 0.09,
2761
+ "learning_rate": 1.774193548387097e-05,
2762
+ "loss": 1.5188,
2763
+ "step": 459
2764
+ },
2765
+ {
2766
+ "epoch": 0.09,
2767
+ "learning_rate": 1.733870967741936e-05,
2768
+ "loss": 1.4665,
2769
+ "step": 460
2770
+ },
2771
+ {
2772
+ "epoch": 0.09,
2773
+ "learning_rate": 1.693548387096774e-05,
2774
+ "loss": 1.6974,
2775
+ "step": 461
2776
+ },
2777
+ {
2778
+ "epoch": 0.09,
2779
+ "learning_rate": 1.653225806451613e-05,
2780
+ "loss": 1.6466,
2781
+ "step": 462
2782
+ },
2783
+ {
2784
+ "epoch": 0.09,
2785
+ "learning_rate": 1.6129032258064517e-05,
2786
+ "loss": 1.6057,
2787
+ "step": 463
2788
+ },
2789
+ {
2790
+ "epoch": 0.09,
2791
+ "learning_rate": 1.5725806451612903e-05,
2792
+ "loss": 1.489,
2793
+ "step": 464
2794
+ },
2795
+ {
2796
+ "epoch": 0.09,
2797
+ "learning_rate": 1.5322580645161292e-05,
2798
+ "loss": 1.7375,
2799
+ "step": 465
2800
+ },
2801
+ {
2802
+ "epoch": 0.09,
2803
+ "learning_rate": 1.4919354838709679e-05,
2804
+ "loss": 1.5686,
2805
+ "step": 466
2806
+ },
2807
+ {
2808
+ "epoch": 0.09,
2809
+ "learning_rate": 1.4516129032258066e-05,
2810
+ "loss": 1.382,
2811
+ "step": 467
2812
+ },
2813
+ {
2814
+ "epoch": 0.09,
2815
+ "learning_rate": 1.4112903225806454e-05,
2816
+ "loss": 1.7037,
2817
+ "step": 468
2818
+ },
2819
+ {
2820
+ "epoch": 0.09,
2821
+ "learning_rate": 1.3709677419354839e-05,
2822
+ "loss": 1.5521,
2823
+ "step": 469
2824
+ },
2825
+ {
2826
+ "epoch": 0.09,
2827
+ "learning_rate": 1.3306451612903225e-05,
2828
+ "loss": 1.3172,
2829
+ "step": 470
2830
+ },
2831
+ {
2832
+ "epoch": 0.09,
2833
+ "learning_rate": 1.2903225806451613e-05,
2834
+ "loss": 1.4997,
2835
+ "step": 471
2836
+ },
2837
+ {
2838
+ "epoch": 0.09,
2839
+ "learning_rate": 1.25e-05,
2840
+ "loss": 1.7076,
2841
+ "step": 472
2842
+ },
2843
+ {
2844
+ "epoch": 0.09,
2845
+ "learning_rate": 1.2096774193548388e-05,
2846
+ "loss": 1.432,
2847
+ "step": 473
2848
+ },
2849
+ {
2850
+ "epoch": 0.09,
2851
+ "learning_rate": 1.1693548387096775e-05,
2852
+ "loss": 1.8895,
2853
+ "step": 474
2854
+ },
2855
+ {
2856
+ "epoch": 0.09,
2857
+ "learning_rate": 1.129032258064516e-05,
2858
+ "loss": 1.536,
2859
+ "step": 475
2860
  }
2861
  ],
2862
  "logging_steps": 1,
2863
  "max_steps": 501,
2864
  "num_train_epochs": 1,
2865
  "save_steps": 25,
2866
+ "total_flos": 2.920625653658419e+16,
2867
  "trial_name": null,
2868
  "trial_params": null
2869
  }