nutorbit commited on
Commit
73fae74
1 Parent(s): b8b2dbb

Training in progress, step 500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1efd90182dfa6a81ee47d9232456689b3603b96f32f042109f9b660e92bd1d92
3
  size 72673016
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5aa386a2c9834888591403fce04fe54adfa1b43b9303a7d60a963d38ab108b0
3
  size 72673016
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:811fb76be471b2d061d804906e97d50c54cb2cf42f67f6d42e24bb576f4155dc
3
  size 36892564
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96d730b214272ab23149f7fdff1c73ce01ac20891661aebf4d80825d10e2e10d
3
  size 36892564
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c54609d4e8cb89282e95eaa414501a7844ec04587efed4c10cd692e700fa780
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:068d082f32e81259395603675356a1debbab51aee10f85787a0679721344dfcf
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7f40779dea4b6e00c1e20018f59c11c5cbe1ad90972d2ca12df667176352bf8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dfbeacc2e776d52cf1b01ae6429ccbc09a726e11618310ffaa3029c92c97a0e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.08980054825597883,
5
  "eval_steps": 1000,
6
- "global_step": 475,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2857,13 +2857,163 @@
2857
  "learning_rate": 1.4112903225806454e-05,
2858
  "loss": 1.6849,
2859
  "step": 475
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2860
  }
2861
  ],
2862
  "logging_steps": 1,
2863
  "max_steps": 501,
2864
  "num_train_epochs": 1,
2865
  "save_steps": 25,
2866
- "total_flos": 2.279886077804544e+16,
2867
  "trial_name": null,
2868
  "trial_params": null
2869
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.09452689290103035,
5
  "eval_steps": 1000,
6
+ "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2857
  "learning_rate": 1.4112903225806454e-05,
2858
  "loss": 1.6849,
2859
  "step": 475
2860
+ },
2861
+ {
2862
+ "epoch": 0.09,
2863
+ "learning_rate": 1.3709677419354839e-05,
2864
+ "loss": 1.9449,
2865
+ "step": 476
2866
+ },
2867
+ {
2868
+ "epoch": 0.09,
2869
+ "learning_rate": 1.3306451612903225e-05,
2870
+ "loss": 1.6432,
2871
+ "step": 477
2872
+ },
2873
+ {
2874
+ "epoch": 0.09,
2875
+ "learning_rate": 1.2903225806451613e-05,
2876
+ "loss": 1.725,
2877
+ "step": 478
2878
+ },
2879
+ {
2880
+ "epoch": 0.09,
2881
+ "learning_rate": 1.25e-05,
2882
+ "loss": 1.7214,
2883
+ "step": 479
2884
+ },
2885
+ {
2886
+ "epoch": 0.09,
2887
+ "learning_rate": 1.2096774193548388e-05,
2888
+ "loss": 1.9034,
2889
+ "step": 480
2890
+ },
2891
+ {
2892
+ "epoch": 0.09,
2893
+ "learning_rate": 1.1693548387096775e-05,
2894
+ "loss": 1.691,
2895
+ "step": 481
2896
+ },
2897
+ {
2898
+ "epoch": 0.09,
2899
+ "learning_rate": 1.129032258064516e-05,
2900
+ "loss": 2.1742,
2901
+ "step": 482
2902
+ },
2903
+ {
2904
+ "epoch": 0.09,
2905
+ "learning_rate": 1.0887096774193549e-05,
2906
+ "loss": 1.6739,
2907
+ "step": 483
2908
+ },
2909
+ {
2910
+ "epoch": 0.09,
2911
+ "learning_rate": 1.0483870967741936e-05,
2912
+ "loss": 1.7993,
2913
+ "step": 484
2914
+ },
2915
+ {
2916
+ "epoch": 0.09,
2917
+ "learning_rate": 1.0080645161290323e-05,
2918
+ "loss": 1.544,
2919
+ "step": 485
2920
+ },
2921
+ {
2922
+ "epoch": 0.09,
2923
+ "learning_rate": 9.67741935483871e-06,
2924
+ "loss": 1.8974,
2925
+ "step": 486
2926
+ },
2927
+ {
2928
+ "epoch": 0.09,
2929
+ "learning_rate": 9.274193548387097e-06,
2930
+ "loss": 1.774,
2931
+ "step": 487
2932
+ },
2933
+ {
2934
+ "epoch": 0.09,
2935
+ "learning_rate": 8.870967741935484e-06,
2936
+ "loss": 1.4694,
2937
+ "step": 488
2938
+ },
2939
+ {
2940
+ "epoch": 0.09,
2941
+ "learning_rate": 8.46774193548387e-06,
2942
+ "loss": 1.7621,
2943
+ "step": 489
2944
+ },
2945
+ {
2946
+ "epoch": 0.09,
2947
+ "learning_rate": 8.064516129032258e-06,
2948
+ "loss": 1.7785,
2949
+ "step": 490
2950
+ },
2951
+ {
2952
+ "epoch": 0.09,
2953
+ "learning_rate": 7.661290322580646e-06,
2954
+ "loss": 2.0437,
2955
+ "step": 491
2956
+ },
2957
+ {
2958
+ "epoch": 0.09,
2959
+ "learning_rate": 7.258064516129033e-06,
2960
+ "loss": 1.7494,
2961
+ "step": 492
2962
+ },
2963
+ {
2964
+ "epoch": 0.09,
2965
+ "learning_rate": 6.854838709677419e-06,
2966
+ "loss": 1.7531,
2967
+ "step": 493
2968
+ },
2969
+ {
2970
+ "epoch": 0.09,
2971
+ "learning_rate": 6.451612903225806e-06,
2972
+ "loss": 1.9762,
2973
+ "step": 494
2974
+ },
2975
+ {
2976
+ "epoch": 0.09,
2977
+ "learning_rate": 6.048387096774194e-06,
2978
+ "loss": 1.6643,
2979
+ "step": 495
2980
+ },
2981
+ {
2982
+ "epoch": 0.09,
2983
+ "learning_rate": 5.64516129032258e-06,
2984
+ "loss": 1.8211,
2985
+ "step": 496
2986
+ },
2987
+ {
2988
+ "epoch": 0.09,
2989
+ "learning_rate": 5.241935483870968e-06,
2990
+ "loss": 1.6926,
2991
+ "step": 497
2992
+ },
2993
+ {
2994
+ "epoch": 0.09,
2995
+ "learning_rate": 4.838709677419355e-06,
2996
+ "loss": 1.8867,
2997
+ "step": 498
2998
+ },
2999
+ {
3000
+ "epoch": 0.09,
3001
+ "learning_rate": 4.435483870967742e-06,
3002
+ "loss": 1.6068,
3003
+ "step": 499
3004
+ },
3005
+ {
3006
+ "epoch": 0.09,
3007
+ "learning_rate": 4.032258064516129e-06,
3008
+ "loss": 1.6741,
3009
+ "step": 500
3010
  }
3011
  ],
3012
  "logging_steps": 1,
3013
  "max_steps": 501,
3014
  "num_train_epochs": 1,
3015
  "save_steps": 25,
3016
+ "total_flos": 2.396005929934848e+16,
3017
  "trial_name": null,
3018
  "trial_params": null
3019
  }