nlparabic commited on
Commit
7e84a34
1 Parent(s): 5cf7475

End of training

Browse files
README.md CHANGED
@@ -18,11 +18,11 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [riotu-lab/ArabianGPT-01B](https://huggingface.co/riotu-lab/ArabianGPT-01B) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.5049
22
- - Bleu: 0.2594
23
- - Rouge1: 0.6161
24
- - Rouge2: 0.3829
25
- - Rougel: 0.6125
26
 
27
  ## Model description
28
 
 
18
 
19
  This model is a fine-tuned version of [riotu-lab/ArabianGPT-01B](https://huggingface.co/riotu-lab/ArabianGPT-01B) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.4720
22
+ - Bleu: 0.2297
23
+ - Rouge1: 0.5777
24
+ - Rouge2: 0.3341
25
+ - Rougel: 0.5758
26
 
27
  ## Model description
28
 
all_results.json CHANGED
@@ -1,19 +1,19 @@
1
  {
2
- "epoch": 11.0,
3
  "eval_bleu": 0.22970619705356748,
4
  "eval_loss": 0.47202983498573303,
5
  "eval_rouge1": 0.5777164933812552,
6
  "eval_rouge2": 0.33405816844574837,
7
  "eval_rougeL": 0.5758449342460217,
8
- "eval_runtime": 1.2669,
9
  "eval_samples": 304,
10
- "eval_samples_per_second": 239.949,
11
- "eval_steps_per_second": 29.994,
12
  "perplexity": 1.603245215811176,
13
- "total_flos": 876634767360000.0,
14
- "train_loss": 0.9867972013005457,
15
- "train_runtime": 2886.3238,
16
  "train_samples": 1220,
17
- "train_samples_per_second": 8.454,
18
- "train_steps_per_second": 1.06
19
  }
 
1
  {
2
+ "epoch": 16.0,
3
  "eval_bleu": 0.22970619705356748,
4
  "eval_loss": 0.47202983498573303,
5
  "eval_rouge1": 0.5777164933812552,
6
  "eval_rouge2": 0.33405816844574837,
7
  "eval_rougeL": 0.5758449342460217,
8
+ "eval_runtime": 3.3676,
9
  "eval_samples": 304,
10
+ "eval_samples_per_second": 90.273,
11
+ "eval_steps_per_second": 11.284,
12
  "perplexity": 1.603245215811176,
13
+ "total_flos": 1275105116160000.0,
14
+ "train_loss": 0.024901744976542354,
15
+ "train_runtime": 209.6658,
16
  "train_samples": 1220,
17
+ "train_samples_per_second": 116.376,
18
+ "train_steps_per_second": 14.595
19
  }
egy_training_log.txt CHANGED
@@ -325,3 +325,5 @@ INFO:root:Epoch 15.0: Train Loss = 0.0787, Eval Loss = 0.5004830360412598
325
  INFO:absl:Using default tokenizer.
326
  INFO:root:Epoch 16.0: Train Loss = 0.0757, Eval Loss = 0.5019999146461487
327
  INFO:absl:Using default tokenizer.
 
 
 
325
  INFO:absl:Using default tokenizer.
326
  INFO:root:Epoch 16.0: Train Loss = 0.0757, Eval Loss = 0.5019999146461487
327
  INFO:absl:Using default tokenizer.
328
+ INFO:__main__:*** Evaluate ***
329
+ INFO:absl:Using default tokenizer.
eval_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 11.0,
3
  "eval_bleu": 0.22970619705356748,
4
  "eval_loss": 0.47202983498573303,
5
  "eval_rouge1": 0.5777164933812552,
6
  "eval_rouge2": 0.33405816844574837,
7
  "eval_rougeL": 0.5758449342460217,
8
- "eval_runtime": 1.2669,
9
  "eval_samples": 304,
10
- "eval_samples_per_second": 239.949,
11
- "eval_steps_per_second": 29.994,
12
  "perplexity": 1.603245215811176
13
  }
 
1
  {
2
+ "epoch": 16.0,
3
  "eval_bleu": 0.22970619705356748,
4
  "eval_loss": 0.47202983498573303,
5
  "eval_rouge1": 0.5777164933812552,
6
  "eval_rouge2": 0.33405816844574837,
7
  "eval_rougeL": 0.5758449342460217,
8
+ "eval_runtime": 3.3676,
9
  "eval_samples": 304,
10
+ "eval_samples_per_second": 90.273,
11
+ "eval_steps_per_second": 11.284,
12
  "perplexity": 1.603245215811176
13
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 11.0,
3
- "total_flos": 876634767360000.0,
4
- "train_loss": 0.9867972013005457,
5
- "train_runtime": 2886.3238,
6
  "train_samples": 1220,
7
- "train_samples_per_second": 8.454,
8
- "train_steps_per_second": 1.06
9
  }
 
1
  {
2
+ "epoch": 16.0,
3
+ "total_flos": 1275105116160000.0,
4
+ "train_loss": 0.024901744976542354,
5
+ "train_runtime": 209.6658,
6
  "train_samples": 1220,
7
+ "train_samples_per_second": 116.376,
8
+ "train_steps_per_second": 14.595
9
  }
train_vs_val_loss.png CHANGED
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.47202983498573303,
3
  "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_yem/checkpoint-918",
4
- "epoch": 11.0,
5
  "eval_steps": 500,
6
- "global_step": 1683,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -218,13 +218,108 @@
218
  "step": 1683
219
  },
220
  {
221
- "epoch": 11.0,
222
- "step": 1683,
223
- "total_flos": 876634767360000.0,
224
- "train_loss": 0.9867972013005457,
225
- "train_runtime": 2886.3238,
226
- "train_samples_per_second": 8.454,
227
- "train_steps_per_second": 1.06
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  }
229
  ],
230
  "logging_steps": 500,
@@ -253,7 +348,7 @@
253
  "attributes": {}
254
  }
255
  },
256
- "total_flos": 876634767360000.0,
257
  "train_batch_size": 8,
258
  "trial_name": null,
259
  "trial_params": null
 
1
  {
2
  "best_metric": 0.47202983498573303,
3
  "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_yem/checkpoint-918",
4
+ "epoch": 16.0,
5
  "eval_steps": 500,
6
+ "global_step": 2448,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
218
  "step": 1683
219
  },
220
  {
221
+ "epoch": 12.0,
222
+ "grad_norm": 0.6220578551292419,
223
+ "learning_rate": 2.3906250000000002e-05,
224
+ "loss": 0.0875,
225
+ "step": 1836
226
+ },
227
+ {
228
+ "epoch": 12.0,
229
+ "eval_bleu": 0.25506926105311084,
230
+ "eval_loss": 0.4913596212863922,
231
+ "eval_rouge1": 0.6097696683692526,
232
+ "eval_rouge2": 0.38019766139150524,
233
+ "eval_rougeL": 0.6055686891522601,
234
+ "eval_runtime": 2.0886,
235
+ "eval_samples_per_second": 145.549,
236
+ "eval_steps_per_second": 18.194,
237
+ "step": 1836
238
+ },
239
+ {
240
+ "epoch": 13.0,
241
+ "grad_norm": 0.7318098545074463,
242
+ "learning_rate": 2.091796875e-05,
243
+ "loss": 0.0825,
244
+ "step": 1989
245
+ },
246
+ {
247
+ "epoch": 13.0,
248
+ "eval_bleu": 0.26272086017465046,
249
+ "eval_loss": 0.4981193542480469,
250
+ "eval_rouge1": 0.6144877321856528,
251
+ "eval_rouge2": 0.3872195934378641,
252
+ "eval_rougeL": 0.6108120100650027,
253
+ "eval_runtime": 1.1422,
254
+ "eval_samples_per_second": 266.148,
255
+ "eval_steps_per_second": 33.268,
256
+ "step": 1989
257
+ },
258
+ {
259
+ "epoch": 14.0,
260
+ "grad_norm": 0.794402539730072,
261
+ "learning_rate": 1.79296875e-05,
262
+ "loss": 0.0787,
263
+ "step": 2142
264
+ },
265
+ {
266
+ "epoch": 14.0,
267
+ "eval_bleu": 0.25895474712357536,
268
+ "eval_loss": 0.5004830360412598,
269
+ "eval_rouge1": 0.6138305476317043,
270
+ "eval_rouge2": 0.3796550798534031,
271
+ "eval_rougeL": 0.6100541920692137,
272
+ "eval_runtime": 1.1033,
273
+ "eval_samples_per_second": 275.529,
274
+ "eval_steps_per_second": 34.441,
275
+ "step": 2142
276
+ },
277
+ {
278
+ "epoch": 15.0,
279
+ "grad_norm": 0.7674385905265808,
280
+ "learning_rate": 1.4941406250000001e-05,
281
+ "loss": 0.0757,
282
+ "step": 2295
283
+ },
284
+ {
285
+ "epoch": 15.0,
286
+ "eval_bleu": 0.2632177488955002,
287
+ "eval_loss": 0.5019999146461487,
288
+ "eval_rouge1": 0.6170590027553351,
289
+ "eval_rouge2": 0.3850023586523127,
290
+ "eval_rougeL": 0.6140574753716048,
291
+ "eval_runtime": 4.8266,
292
+ "eval_samples_per_second": 62.985,
293
+ "eval_steps_per_second": 7.873,
294
+ "step": 2295
295
+ },
296
+ {
297
+ "epoch": 16.0,
298
+ "grad_norm": 0.7136771082878113,
299
+ "learning_rate": 1.1953125000000001e-05,
300
+ "loss": 0.074,
301
+ "step": 2448
302
+ },
303
+ {
304
+ "epoch": 16.0,
305
+ "eval_bleu": 0.25943804422988964,
306
+ "eval_loss": 0.5049206018447876,
307
+ "eval_rouge1": 0.6161012779292924,
308
+ "eval_rouge2": 0.3828890396119353,
309
+ "eval_rougeL": 0.6124852816095869,
310
+ "eval_runtime": 1.0975,
311
+ "eval_samples_per_second": 276.998,
312
+ "eval_steps_per_second": 34.625,
313
+ "step": 2448
314
+ },
315
+ {
316
+ "epoch": 16.0,
317
+ "step": 2448,
318
+ "total_flos": 1275105116160000.0,
319
+ "train_loss": 0.024901744976542354,
320
+ "train_runtime": 209.6658,
321
+ "train_samples_per_second": 116.376,
322
+ "train_steps_per_second": 14.595
323
  }
324
  ],
325
  "logging_steps": 500,
 
348
  "attributes": {}
349
  }
350
  },
351
+ "total_flos": 1275105116160000.0,
352
  "train_batch_size": 8,
353
  "trial_name": null,
354
  "trial_params": null