nlparabic commited on
Commit
4b5d6d5
1 Parent(s): 4fd73ed

End of training

Browse files
README.md CHANGED
@@ -18,11 +18,11 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [riotu-lab/ArabianGPT-01B](https://huggingface.co/riotu-lab/ArabianGPT-01B) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.6607
22
- - Bleu: 0.3968
23
- - Rouge1: 0.6232
24
- - Rouge2: 0.3746
25
- - Rougel: 0.6212
26
 
27
  ## Model description
28
 
 
18
 
19
  This model is a fine-tuned version of [riotu-lab/ArabianGPT-01B](https://huggingface.co/riotu-lab/ArabianGPT-01B) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.6246
22
+ - Bleu: 0.3877
23
+ - Rouge1: 0.5958
24
+ - Rouge2: 0.3370
25
+ - Rougel: 0.5935
26
 
27
  ## Model description
28
 
all_results.json CHANGED
@@ -1,19 +1,19 @@
1
  {
2
- "epoch": 15.0,
3
- "eval_bleu": 0.2678870499231116,
4
- "eval_loss": 0.6265950798988342,
5
- "eval_rouge1": 0.5977012354572853,
6
- "eval_rouge2": 0.34430833134800065,
7
- "eval_rougeL": 0.5958973349618409,
8
- "eval_runtime": 17.8984,
9
- "eval_samples": 5405,
10
- "eval_samples_per_second": 301.982,
11
- "eval_steps_per_second": 37.769,
12
- "perplexity": 1.8712283369394682,
13
- "total_flos": 2.118621118464e+16,
14
- "train_loss": 0.058961041791913305,
15
- "train_runtime": 1668.9384,
16
- "train_samples": 21622,
17
- "train_samples_per_second": 259.111,
18
- "train_steps_per_second": 32.392
19
  }
 
1
  {
2
+ "epoch": 8.0,
3
+ "eval_bleu": 0.3877191285680082,
4
+ "eval_loss": 0.6245766282081604,
5
+ "eval_rouge1": 0.5957940125562868,
6
+ "eval_rouge2": 0.3370143004573494,
7
+ "eval_rougeL": 0.5934967085426222,
8
+ "eval_runtime": 17.5031,
9
+ "eval_samples": 5380,
10
+ "eval_samples_per_second": 307.375,
11
+ "eval_steps_per_second": 38.45,
12
+ "perplexity": 1.8674551622056355,
13
+ "total_flos": 1.1198453907456e+16,
14
+ "train_loss": 0.5107668242644979,
15
+ "train_runtime": 1728.8387,
16
+ "train_samples": 21429,
17
+ "train_samples_per_second": 247.901,
18
+ "train_steps_per_second": 30.992
19
  }
egy_training_log.txt CHANGED
@@ -156,3 +156,5 @@ INFO:root:Epoch 7.0: Train Loss = 0.328, Eval Loss = 0.6383510231971741
156
  INFO:absl:Using default tokenizer.
157
  INFO:root:Epoch 8.0: Train Loss = 0.2832, Eval Loss = 0.6490957736968994
158
  INFO:absl:Using default tokenizer.
 
 
 
156
  INFO:absl:Using default tokenizer.
157
  INFO:root:Epoch 8.0: Train Loss = 0.2832, Eval Loss = 0.6490957736968994
158
  INFO:absl:Using default tokenizer.
159
+ INFO:__main__:*** Evaluate ***
160
+ INFO:absl:Using default tokenizer.
eval_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 15.0,
3
- "eval_bleu": 0.2678870499231116,
4
- "eval_loss": 0.6265950798988342,
5
- "eval_rouge1": 0.5977012354572853,
6
- "eval_rouge2": 0.34430833134800065,
7
- "eval_rougeL": 0.5958973349618409,
8
- "eval_runtime": 17.8984,
9
- "eval_samples": 5405,
10
- "eval_samples_per_second": 301.982,
11
- "eval_steps_per_second": 37.769,
12
- "perplexity": 1.8712283369394682
13
  }
 
1
  {
2
+ "epoch": 8.0,
3
+ "eval_bleu": 0.3877191285680082,
4
+ "eval_loss": 0.6245766282081604,
5
+ "eval_rouge1": 0.5957940125562868,
6
+ "eval_rouge2": 0.3370143004573494,
7
+ "eval_rougeL": 0.5934967085426222,
8
+ "eval_runtime": 17.5031,
9
+ "eval_samples": 5380,
10
+ "eval_samples_per_second": 307.375,
11
+ "eval_steps_per_second": 38.45,
12
+ "perplexity": 1.8674551622056355
13
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 15.0,
3
- "total_flos": 2.118621118464e+16,
4
- "train_loss": 0.058961041791913305,
5
- "train_runtime": 1668.9384,
6
- "train_samples": 21622,
7
- "train_samples_per_second": 259.111,
8
- "train_steps_per_second": 32.392
9
  }
 
1
  {
2
+ "epoch": 8.0,
3
+ "total_flos": 1.1198453907456e+16,
4
+ "train_loss": 0.5107668242644979,
5
+ "train_runtime": 1728.8387,
6
+ "train_samples": 21429,
7
+ "train_samples_per_second": 247.901,
8
+ "train_steps_per_second": 30.992
9
  }
train_vs_val_loss.png CHANGED
trainer_state.json CHANGED
@@ -1,310 +1,177 @@
1
  {
2
- "best_metric": 0.6265950798988342,
3
- "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_dj/checkpoint-13515",
4
- "epoch": 15.0,
5
  "eval_steps": 500,
6
- "global_step": 40545,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 1.3912957906723022,
14
- "learning_rate": 3.552372190826653e-05,
15
- "loss": 1.2513,
16
- "step": 2703
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_bleu": 0.22005527068442493,
21
- "eval_loss": 0.7111806869506836,
22
- "eval_rouge1": 0.5171822713118965,
23
- "eval_rouge2": 0.253317924025756,
24
- "eval_rougeL": 0.5148202154962768,
25
- "eval_runtime": 21.8652,
26
- "eval_samples_per_second": 247.197,
27
- "eval_steps_per_second": 30.917,
28
- "step": 2703
29
  },
30
  {
31
  "epoch": 2.0,
32
- "grad_norm": 1.4323362112045288,
33
- "learning_rate": 1.7761860954133264e-05,
34
- "loss": 0.6462,
35
- "step": 5406
36
  },
37
  {
38
  "epoch": 2.0,
39
- "eval_bleu": 0.24508343939723337,
40
- "eval_loss": 0.6569304466247559,
41
- "eval_rouge1": 0.557883823201639,
42
- "eval_rouge2": 0.2973836473978787,
43
- "eval_rougeL": 0.5553146565104614,
44
- "eval_runtime": 150.8243,
45
- "eval_samples_per_second": 35.836,
46
- "eval_steps_per_second": 4.482,
47
- "step": 5406
48
  },
49
  {
50
  "epoch": 3.0,
51
- "grad_norm": 1.2555302381515503,
52
- "learning_rate": 0.0,
53
- "loss": 0.5673,
54
- "step": 8109
55
  },
56
  {
57
  "epoch": 3.0,
58
- "eval_bleu": 0.2518224574721449,
59
- "eval_loss": 0.6498554348945618,
60
- "eval_rouge1": 0.57015304568835,
61
- "eval_rouge2": 0.3118394078609683,
62
- "eval_rougeL": 0.5676740384593599,
63
- "eval_runtime": 172.9357,
64
- "eval_samples_per_second": 31.254,
65
- "eval_steps_per_second": 3.909,
66
- "step": 8109
67
  },
68
  {
69
  "epoch": 4.0,
70
- "grad_norm": 1.4183156490325928,
71
- "learning_rate": 4.037341299477222e-05,
72
- "loss": 0.556,
73
- "step": 10812
74
  },
75
  {
76
  "epoch": 4.0,
77
- "eval_bleu": 0.25983253831381714,
78
- "eval_loss": 0.63295978307724,
79
- "eval_rouge1": 0.5829310998998971,
80
- "eval_rouge2": 0.326193735265751,
81
- "eval_rougeL": 0.5807349807527458,
82
- "eval_runtime": 40.5509,
83
- "eval_samples_per_second": 133.289,
84
- "eval_steps_per_second": 16.67,
85
- "step": 10812
86
  },
87
  {
88
  "epoch": 5.0,
89
- "grad_norm": 1.186664342880249,
90
- "learning_rate": 3.785007468259896e-05,
91
- "loss": 0.4786,
92
- "step": 13515
93
  },
94
  {
95
  "epoch": 5.0,
96
- "eval_bleu": 0.2678870499231116,
97
- "eval_loss": 0.6265950798988342,
98
- "eval_rouge1": 0.5977012354572853,
99
- "eval_rouge2": 0.34430833134800065,
100
- "eval_rougeL": 0.5958973349618409,
101
- "eval_runtime": 36.0053,
102
- "eval_samples_per_second": 150.117,
103
- "eval_steps_per_second": 18.775,
104
- "step": 13515
105
  },
106
  {
107
  "epoch": 6.0,
108
- "grad_norm": 1.1597645282745361,
109
- "learning_rate": 3.5326736370425696e-05,
110
- "loss": 0.4123,
111
- "step": 16218
112
  },
113
  {
114
  "epoch": 6.0,
115
- "eval_bleu": 0.2707063976336222,
116
- "eval_loss": 0.630312979221344,
117
- "eval_rouge1": 0.604436172840783,
118
- "eval_rouge2": 0.35482389915380186,
119
- "eval_rougeL": 0.6026792061487433,
120
- "eval_runtime": 161.1219,
121
- "eval_samples_per_second": 33.546,
122
- "eval_steps_per_second": 4.196,
123
- "step": 16218
124
  },
125
  {
126
  "epoch": 7.0,
127
- "grad_norm": 1.27555251121521,
128
- "learning_rate": 3.280339805825243e-05,
129
- "loss": 0.3573,
130
- "step": 18921
131
  },
132
  {
133
  "epoch": 7.0,
134
- "eval_bleu": 0.27393786087649125,
135
- "eval_loss": 0.6372247338294983,
136
- "eval_rouge1": 0.6108088318784769,
137
- "eval_rouge2": 0.36307215008060617,
138
- "eval_rougeL": 0.6088126794948809,
139
- "eval_runtime": 159.7397,
140
- "eval_samples_per_second": 33.836,
141
- "eval_steps_per_second": 4.232,
142
- "step": 18921
143
  },
144
  {
145
  "epoch": 8.0,
146
- "grad_norm": 1.6310491561889648,
147
- "learning_rate": 3.0280059746079166e-05,
148
- "loss": 0.3108,
149
- "step": 21624
150
  },
151
  {
152
  "epoch": 8.0,
153
- "eval_bleu": 0.2759919170184585,
154
- "eval_loss": 0.646577000617981,
155
- "eval_rouge1": 0.6130373826214599,
156
- "eval_rouge2": 0.36776670773707465,
157
- "eval_rougeL": 0.6111699095881062,
158
- "eval_runtime": 150.1086,
159
- "eval_samples_per_second": 36.007,
160
- "eval_steps_per_second": 4.503,
161
- "step": 21624
162
  },
163
  {
164
- "epoch": 9.0,
165
- "grad_norm": 1.6629250049591064,
166
- "learning_rate": 2.77567214339059e-05,
167
- "loss": 0.2719,
168
- "step": 24327
169
- },
170
- {
171
- "epoch": 9.0,
172
- "eval_bleu": 0.2801810856751608,
173
- "eval_loss": 0.6550981998443604,
174
- "eval_rouge1": 0.6177880665801061,
175
- "eval_rouge2": 0.3727206564119486,
176
- "eval_rougeL": 0.6159303776582042,
177
- "eval_runtime": 159.1145,
178
- "eval_samples_per_second": 33.969,
179
- "eval_steps_per_second": 4.249,
180
- "step": 24327
181
- },
182
- {
183
- "epoch": 10.0,
184
- "grad_norm": 1.5184797048568726,
185
- "learning_rate": 2.523338312173264e-05,
186
- "loss": 0.2396,
187
- "step": 27030
188
- },
189
- {
190
- "epoch": 10.0,
191
- "eval_bleu": 0.27697504925957206,
192
- "eval_loss": 0.665830671787262,
193
- "eval_rouge1": 0.6173182845826588,
194
- "eval_rouge2": 0.37288754745117264,
195
- "eval_rougeL": 0.6152835100872982,
196
- "eval_runtime": 110.2964,
197
- "eval_samples_per_second": 49.004,
198
- "eval_steps_per_second": 6.129,
199
- "step": 27030
200
- },
201
- {
202
- "epoch": 11.0,
203
- "grad_norm": 1.3458495140075684,
204
- "learning_rate": 2.2710044809559374e-05,
205
- "loss": 0.2129,
206
- "step": 29733
207
- },
208
- {
209
- "epoch": 11.0,
210
- "eval_bleu": 0.27843197191410246,
211
- "eval_loss": 0.6767598390579224,
212
- "eval_rouge1": 0.6188286228800556,
213
- "eval_rouge2": 0.3762109244532287,
214
- "eval_rougeL": 0.617052574223907,
215
- "eval_runtime": 24.0534,
216
- "eval_samples_per_second": 224.709,
217
- "eval_steps_per_second": 28.104,
218
- "step": 29733
219
- },
220
- {
221
- "epoch": 12.0,
222
- "grad_norm": 1.636675477027893,
223
- "learning_rate": 2.018670649738611e-05,
224
- "loss": 0.191,
225
- "step": 32436
226
- },
227
- {
228
- "epoch": 12.0,
229
- "eval_bleu": 0.2780108798800892,
230
- "eval_loss": 0.6870447993278503,
231
- "eval_rouge1": 0.6208121821010748,
232
- "eval_rouge2": 0.37810190638421814,
233
- "eval_rougeL": 0.6189436880506437,
234
- "eval_runtime": 42.9158,
235
- "eval_samples_per_second": 125.944,
236
- "eval_steps_per_second": 15.752,
237
- "step": 32436
238
- },
239
- {
240
- "epoch": 13.0,
241
- "grad_norm": 1.3978990316390991,
242
- "learning_rate": 1.7663368185212848e-05,
243
- "loss": 0.1733,
244
- "step": 35139
245
- },
246
- {
247
- "epoch": 13.0,
248
- "eval_bleu": 0.2799527424248887,
249
- "eval_loss": 0.696670651435852,
250
- "eval_rouge1": 0.6213835516562576,
251
- "eval_rouge2": 0.3799254363900967,
252
- "eval_rougeL": 0.6195026516671875,
253
- "eval_runtime": 17.6056,
254
- "eval_samples_per_second": 307.005,
255
- "eval_steps_per_second": 38.397,
256
- "step": 35139
257
- },
258
- {
259
- "epoch": 14.0,
260
- "grad_norm": 1.386664628982544,
261
- "learning_rate": 1.5140029873039583e-05,
262
- "loss": 0.1593,
263
- "step": 37842
264
- },
265
- {
266
- "epoch": 14.0,
267
- "eval_bleu": 0.2790160057234741,
268
- "eval_loss": 0.7063180208206177,
269
- "eval_rouge1": 0.6214879203521921,
270
- "eval_rouge2": 0.379862056883408,
271
- "eval_rougeL": 0.6194802915698101,
272
- "eval_runtime": 17.8263,
273
- "eval_samples_per_second": 303.204,
274
- "eval_steps_per_second": 37.921,
275
- "step": 37842
276
- },
277
- {
278
- "epoch": 15.0,
279
- "grad_norm": 1.5141432285308838,
280
- "learning_rate": 1.261669156086632e-05,
281
- "loss": 0.1478,
282
- "step": 40545
283
- },
284
- {
285
- "epoch": 15.0,
286
- "eval_bleu": 0.2808668290315019,
287
- "eval_loss": 0.7138631939888,
288
- "eval_rouge1": 0.6216459883291815,
289
- "eval_rouge2": 0.3804311054098596,
290
- "eval_rougeL": 0.6196983257570402,
291
- "eval_runtime": 17.6502,
292
- "eval_samples_per_second": 306.229,
293
- "eval_steps_per_second": 38.3,
294
- "step": 40545
295
- },
296
- {
297
- "epoch": 15.0,
298
- "step": 40545,
299
- "total_flos": 2.118621118464e+16,
300
- "train_loss": 0.058961041791913305,
301
- "train_runtime": 1668.9384,
302
- "train_samples_per_second": 259.111,
303
- "train_steps_per_second": 32.392
304
  }
305
  ],
306
  "logging_steps": 500,
307
- "max_steps": 54060,
308
  "num_input_tokens_seen": 0,
309
  "num_train_epochs": 20,
310
  "save_steps": 500,
@@ -329,7 +196,7 @@
329
  "attributes": {}
330
  }
331
  },
332
- "total_flos": 2.118621118464e+16,
333
  "train_batch_size": 8,
334
  "trial_name": null,
335
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.6245766282081604,
3
+ "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_dj/checkpoint-8037",
4
+ "epoch": 8.0,
5
  "eval_steps": 500,
6
+ "global_step": 21432,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 1.1291303634643555,
14
+ "learning_rate": 4.7947437829691034e-05,
15
+ "loss": 1.2336,
16
+ "step": 2679
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_bleu": 0.3526371485696729,
21
+ "eval_loss": 0.7061845660209656,
22
+ "eval_rouge1": 0.5198447201202445,
23
+ "eval_rouge2": 0.25470674188424197,
24
+ "eval_rougeL": 0.516986428368343,
25
+ "eval_runtime": 17.6993,
26
+ "eval_samples_per_second": 303.967,
27
+ "eval_steps_per_second": 38.024,
28
+ "step": 2679
29
  },
30
  {
31
  "epoch": 2.0,
32
+ "grad_norm": 1.4575163125991821,
33
+ "learning_rate": 4.542388847023361e-05,
34
+ "loss": 0.634,
35
+ "step": 5358
36
  },
37
  {
38
  "epoch": 2.0,
39
+ "eval_bleu": 0.37564617349393215,
40
+ "eval_loss": 0.6422649025917053,
41
+ "eval_rouge1": 0.5739420026060218,
42
+ "eval_rouge2": 0.3114152854574803,
43
+ "eval_rougeL": 0.571389088656927,
44
+ "eval_runtime": 17.5523,
45
+ "eval_samples_per_second": 306.513,
46
+ "eval_steps_per_second": 38.343,
47
+ "step": 5358
48
  },
49
  {
50
  "epoch": 3.0,
51
+ "grad_norm": 1.237191915512085,
52
+ "learning_rate": 4.290033911077619e-05,
53
+ "loss": 0.5299,
54
+ "step": 8037
55
  },
56
  {
57
  "epoch": 3.0,
58
+ "eval_bleu": 0.3877191285680082,
59
+ "eval_loss": 0.6245766282081604,
60
+ "eval_rouge1": 0.5957940125562868,
61
+ "eval_rouge2": 0.3370143004573494,
62
+ "eval_rougeL": 0.5934967085426222,
63
+ "eval_runtime": 17.5587,
64
+ "eval_samples_per_second": 306.401,
65
+ "eval_steps_per_second": 38.329,
66
+ "step": 8037
67
  },
68
  {
69
  "epoch": 4.0,
70
+ "grad_norm": 1.5628466606140137,
71
+ "learning_rate": 4.0376789751318766e-05,
72
+ "loss": 0.4492,
73
+ "step": 10716
74
  },
75
  {
76
  "epoch": 4.0,
77
+ "eval_bleu": 0.39048560865149107,
78
+ "eval_loss": 0.6246171593666077,
79
+ "eval_rouge1": 0.6081397458304423,
80
+ "eval_rouge2": 0.3525706786064172,
81
+ "eval_rougeL": 0.6056655214414464,
82
+ "eval_runtime": 17.6128,
83
+ "eval_samples_per_second": 305.459,
84
+ "eval_steps_per_second": 38.211,
85
+ "step": 10716
86
  },
87
  {
88
  "epoch": 5.0,
89
+ "grad_norm": 1.8470176458358765,
90
+ "learning_rate": 3.785324039186134e-05,
91
+ "loss": 0.3829,
92
+ "step": 13395
93
  },
94
  {
95
  "epoch": 5.0,
96
+ "eval_bleu": 0.3963490407851369,
97
+ "eval_loss": 0.6300457715988159,
98
+ "eval_rouge1": 0.6145424292978614,
99
+ "eval_rouge2": 0.3620910031723723,
100
+ "eval_rougeL": 0.6124537005851034,
101
+ "eval_runtime": 17.4259,
102
+ "eval_samples_per_second": 308.735,
103
+ "eval_steps_per_second": 38.621,
104
+ "step": 13395
105
  },
106
  {
107
  "epoch": 6.0,
108
+ "grad_norm": 1.5765687227249146,
109
+ "learning_rate": 3.532969103240392e-05,
110
+ "loss": 0.328,
111
+ "step": 16074
112
  },
113
  {
114
  "epoch": 6.0,
115
+ "eval_bleu": 0.3960700684284105,
116
+ "eval_loss": 0.6383510231971741,
117
+ "eval_rouge1": 0.6213431858539703,
118
+ "eval_rouge2": 0.36997070576967905,
119
+ "eval_rougeL": 0.6194073222098655,
120
+ "eval_runtime": 17.7673,
121
+ "eval_samples_per_second": 302.803,
122
+ "eval_steps_per_second": 37.879,
123
+ "step": 16074
124
  },
125
  {
126
  "epoch": 7.0,
127
+ "grad_norm": 1.960555076599121,
128
+ "learning_rate": 3.28061416729465e-05,
129
+ "loss": 0.2832,
130
+ "step": 18753
131
  },
132
  {
133
  "epoch": 7.0,
134
+ "eval_bleu": 0.3998604582987598,
135
+ "eval_loss": 0.6490957736968994,
136
+ "eval_rouge1": 0.623202150680765,
137
+ "eval_rouge2": 0.3741196322885935,
138
+ "eval_rougeL": 0.6209434708040459,
139
+ "eval_runtime": 17.3486,
140
+ "eval_samples_per_second": 310.112,
141
+ "eval_steps_per_second": 38.793,
142
+ "step": 18753
143
  },
144
  {
145
  "epoch": 8.0,
146
+ "grad_norm": 1.4162492752075195,
147
+ "learning_rate": 3.028259231348907e-05,
148
+ "loss": 0.2453,
149
+ "step": 21432
150
  },
151
  {
152
  "epoch": 8.0,
153
+ "eval_bleu": 0.3967586062831279,
154
+ "eval_loss": 0.660749614238739,
155
+ "eval_rouge1": 0.6231513583077875,
156
+ "eval_rouge2": 0.37461482860421447,
157
+ "eval_rougeL": 0.6211860899632824,
158
+ "eval_runtime": 17.8295,
159
+ "eval_samples_per_second": 301.747,
160
+ "eval_steps_per_second": 37.746,
161
+ "step": 21432
162
  },
163
  {
164
+ "epoch": 8.0,
165
+ "step": 21432,
166
+ "total_flos": 1.1198453907456e+16,
167
+ "train_loss": 0.5107668242644979,
168
+ "train_runtime": 1728.8387,
169
+ "train_samples_per_second": 247.901,
170
+ "train_steps_per_second": 30.992
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  }
172
  ],
173
  "logging_steps": 500,
174
+ "max_steps": 53580,
175
  "num_input_tokens_seen": 0,
176
  "num_train_epochs": 20,
177
  "save_steps": 500,
 
196
  "attributes": {}
197
  }
198
  },
199
+ "total_flos": 1.1198453907456e+16,
200
  "train_batch_size": 8,
201
  "trial_name": null,
202
  "trial_params": null