Nexspear commited on
Commit
8f4c40a
·
verified ·
1 Parent(s): 1b244d3

Training in progress, step 35, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7cdedf433deab50184f95cb1c4786410c886e89d09459b37b2d7d7906acd02ab
3
  size 191968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:389eed975d12b564fd874d90814d179fa92a08af2629545d4296c32813c23ddf
3
  size 191968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:82e615783e6c16bb60c6ecc44b6e6a0c6dafdf49ddb6096c0df9fffa2a154e64
3
  size 253144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7af6fbfa9e6decd5896a2bd0853612084b67590060f11f0648066d1a02c4fa2e
3
  size 253144
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d0440fe0e5910ed062e7da8e32dd6c2a310e10dc8b5186169b259a0a8b05db8
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:005d9a86e0834fe0802ab2e1c3fa9eca806e3cae26d1cbb14ab848a758219c13
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d63217b923cb177f669d6bc2174b89abdc6a56d968d279b505491b37976d9bb
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88f387d8c434535a84694e469cebc18f2e722ba31b0dc0372632798b59011377
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.08771929824561403,
5
  "eval_steps": 5,
6
- "global_step": 5,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -30,6 +30,124 @@
30
  "eval_samples_per_second": 573.519,
31
  "eval_steps_per_second": 71.69,
32
  "step": 5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  }
34
  ],
35
  "logging_steps": 3,
@@ -49,7 +167,7 @@
49
  "attributes": {}
50
  }
51
  },
52
- "total_flos": 557753303040.0,
53
  "train_batch_size": 8,
54
  "trial_name": null,
55
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.6140350877192983,
5
  "eval_steps": 5,
6
+ "global_step": 35,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
30
  "eval_samples_per_second": 573.519,
31
  "eval_steps_per_second": 71.69,
32
  "step": 5
33
+ },
34
+ {
35
+ "epoch": 0.10526315789473684,
36
+ "grad_norm": 0.08514741063117981,
37
+ "learning_rate": 6e-05,
38
+ "loss": 10.3717,
39
+ "step": 6
40
+ },
41
+ {
42
+ "epoch": 0.15789473684210525,
43
+ "grad_norm": 0.08602377027273178,
44
+ "learning_rate": 9e-05,
45
+ "loss": 10.3709,
46
+ "step": 9
47
+ },
48
+ {
49
+ "epoch": 0.17543859649122806,
50
+ "eval_loss": 10.365401268005371,
51
+ "eval_runtime": 0.1659,
52
+ "eval_samples_per_second": 578.602,
53
+ "eval_steps_per_second": 72.325,
54
+ "step": 10
55
+ },
56
+ {
57
+ "epoch": 0.21052631578947367,
58
+ "grad_norm": 0.07277622818946838,
59
+ "learning_rate": 9.938441702975689e-05,
60
+ "loss": 10.3704,
61
+ "step": 12
62
+ },
63
+ {
64
+ "epoch": 0.2631578947368421,
65
+ "grad_norm": 0.07048846036195755,
66
+ "learning_rate": 9.619397662556435e-05,
67
+ "loss": 10.3696,
68
+ "step": 15
69
+ },
70
+ {
71
+ "epoch": 0.2631578947368421,
72
+ "eval_loss": 10.363677024841309,
73
+ "eval_runtime": 0.1716,
74
+ "eval_samples_per_second": 559.366,
75
+ "eval_steps_per_second": 69.921,
76
+ "step": 15
77
+ },
78
+ {
79
+ "epoch": 0.3157894736842105,
80
+ "grad_norm": 0.09568169713020325,
81
+ "learning_rate": 9.045084971874738e-05,
82
+ "loss": 10.3689,
83
+ "step": 18
84
+ },
85
+ {
86
+ "epoch": 0.3508771929824561,
87
+ "eval_loss": 10.36184310913086,
88
+ "eval_runtime": 0.1886,
89
+ "eval_samples_per_second": 508.996,
90
+ "eval_steps_per_second": 63.625,
91
+ "step": 20
92
+ },
93
+ {
94
+ "epoch": 0.3684210526315789,
95
+ "grad_norm": 0.08425775915384293,
96
+ "learning_rate": 8.247240241650918e-05,
97
+ "loss": 10.3657,
98
+ "step": 21
99
+ },
100
+ {
101
+ "epoch": 0.42105263157894735,
102
+ "grad_norm": 0.11433319002389908,
103
+ "learning_rate": 7.269952498697734e-05,
104
+ "loss": 10.3645,
105
+ "step": 24
106
+ },
107
+ {
108
+ "epoch": 0.43859649122807015,
109
+ "eval_loss": 10.35986328125,
110
+ "eval_runtime": 0.1662,
111
+ "eval_samples_per_second": 577.559,
112
+ "eval_steps_per_second": 72.195,
113
+ "step": 25
114
+ },
115
+ {
116
+ "epoch": 0.47368421052631576,
117
+ "grad_norm": 0.1078442707657814,
118
+ "learning_rate": 6.167226819279528e-05,
119
+ "loss": 10.3641,
120
+ "step": 27
121
+ },
122
+ {
123
+ "epoch": 0.5263157894736842,
124
+ "grad_norm": 0.11035740375518799,
125
+ "learning_rate": 5e-05,
126
+ "loss": 10.3617,
127
+ "step": 30
128
+ },
129
+ {
130
+ "epoch": 0.5263157894736842,
131
+ "eval_loss": 10.358070373535156,
132
+ "eval_runtime": 0.1694,
133
+ "eval_samples_per_second": 566.668,
134
+ "eval_steps_per_second": 70.833,
135
+ "step": 30
136
+ },
137
+ {
138
+ "epoch": 0.5789473684210527,
139
+ "grad_norm": 0.158222496509552,
140
+ "learning_rate": 3.832773180720475e-05,
141
+ "loss": 10.3614,
142
+ "step": 33
143
+ },
144
+ {
145
+ "epoch": 0.6140350877192983,
146
+ "eval_loss": 10.356663703918457,
147
+ "eval_runtime": 0.1699,
148
+ "eval_samples_per_second": 565.089,
149
+ "eval_steps_per_second": 70.636,
150
+ "step": 35
151
  }
152
  ],
153
  "logging_steps": 3,
 
167
  "attributes": {}
168
  }
169
  },
170
+ "total_flos": 3904273121280.0,
171
  "train_batch_size": 8,
172
  "trial_name": null,
173
  "trial_params": null