kooff11 commited on
Commit
2fa4251
·
verified ·
1 Parent(s): 0ea18bc

Training in progress, step 20, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f71ba216f075d892733de38cfea6a9f3d366c9d227043e562b05d0bd2c5cd4ba
3
  size 73911112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7b96e79d3cfd6ab29e34e5c2d32f782014fc2618c8ceb530405224e78b832ac
3
  size 73911112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a1ef920d4b3dcf4279a301e863c4d0983babc7710262449bbd1b92253c12bbf
3
  size 37965300
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:151e22c4835d1c1b38142522b6beb995bf5f0e0b5abba6f946f5413979fb18a9
3
  size 37965300
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:617810b977cc1e6ca73dc6bfc0370d79fcbc698e6f8bdb1947885c4502d9ece2
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d28f71a183f0a67a9d174bb4cd7b33b832d8562e06b5e3665e5e431d6d9ea01f
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dfd15564f58ad647bdb087a89b4193296dea60904822986703543037d8ee1837
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e82224a25ddffa02df88f19ea586ab1c036794b31b4a630cfd08b6acc0a6cdb
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff2736979009751c0c6b0ddcc5f6544d6f723aa752b4798eab0b70fb76cf0083
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:532138a5ca880d8da393ae449e5715b2766def36b8838785ca08d07228b119b7
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.01007028217769852,
5
  "eval_steps": 5,
6
- "global_step": 15,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -144,6 +144,49 @@
144
  "eval_samples_per_second": 24.565,
145
  "eval_steps_per_second": 6.142,
146
  "step": 15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  }
148
  ],
149
  "logging_steps": 1,
@@ -158,12 +201,12 @@
158
  "should_evaluate": false,
159
  "should_log": false,
160
  "should_save": true,
161
- "should_training_stop": false
162
  },
163
  "attributes": {}
164
  }
165
  },
166
- "total_flos": 6.270090416750592e+16,
167
  "train_batch_size": 2,
168
  "trial_name": null,
169
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.013427042903598028,
5
  "eval_steps": 5,
6
+ "global_step": 20,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
144
  "eval_samples_per_second": 24.565,
145
  "eval_steps_per_second": 6.142,
146
  "step": 15
147
+ },
148
+ {
149
+ "epoch": 0.010741634322878422,
150
+ "grad_norm": 0.23266065120697021,
151
+ "learning_rate": 3.4549150281252636e-05,
152
+ "loss": 0.0488,
153
+ "step": 16
154
+ },
155
+ {
156
+ "epoch": 0.011412986468058323,
157
+ "grad_norm": 0.2226492166519165,
158
+ "learning_rate": 2.061073738537635e-05,
159
+ "loss": 0.0493,
160
+ "step": 17
161
+ },
162
+ {
163
+ "epoch": 0.012084338613238226,
164
+ "grad_norm": 0.23076535761356354,
165
+ "learning_rate": 9.549150281252633e-06,
166
+ "loss": 0.0403,
167
+ "step": 18
168
+ },
169
+ {
170
+ "epoch": 0.012755690758418127,
171
+ "grad_norm": 0.21546129882335663,
172
+ "learning_rate": 2.4471741852423237e-06,
173
+ "loss": 0.0362,
174
+ "step": 19
175
+ },
176
+ {
177
+ "epoch": 0.013427042903598028,
178
+ "grad_norm": 0.23565559089183807,
179
+ "learning_rate": 0.0,
180
+ "loss": 0.0457,
181
+ "step": 20
182
+ },
183
+ {
184
+ "epoch": 0.013427042903598028,
185
+ "eval_loss": 0.05111876502633095,
186
+ "eval_runtime": 408.1451,
187
+ "eval_samples_per_second": 24.587,
188
+ "eval_steps_per_second": 6.147,
189
+ "step": 20
190
  }
191
  ],
192
  "logging_steps": 1,
 
201
  "should_evaluate": false,
202
  "should_log": false,
203
  "should_save": true,
204
+ "should_training_stop": true
205
  },
206
  "attributes": {}
207
  }
208
  },
209
+ "total_flos": 8.360120555667456e+16,
210
  "train_batch_size": 2,
211
  "trial_name": null,
212
  "trial_params": null