nlparabic commited on
Commit
b4cca2d
1 Parent(s): 35b3bdc

End of training

Browse files
Files changed (5) hide show
  1. README.md +5 -0
  2. all_results.json +12 -12
  3. eval_results.json +7 -7
  4. train_results.json +6 -6
  5. trainer_state.json +12 -54
README.md CHANGED
@@ -3,6 +3,8 @@ license: apache-2.0
3
  base_model: riotu-lab/ArabianGPT-01B
4
  tags:
5
  - generated_from_trainer
 
 
6
  model-index:
7
  - name: test-aragpt
8
  results: []
@@ -14,6 +16,9 @@ should probably proofread and complete it, then remove this comment. -->
14
  # test-aragpt
15
 
16
  This model is a fine-tuned version of [riotu-lab/ArabianGPT-01B](https://huggingface.co/riotu-lab/ArabianGPT-01B) on an unknown dataset.
 
 
 
17
 
18
  ## Model description
19
 
 
3
  base_model: riotu-lab/ArabianGPT-01B
4
  tags:
5
  - generated_from_trainer
6
+ metrics:
7
+ - bleu
8
  model-index:
9
  - name: test-aragpt
10
  results: []
 
16
  # test-aragpt
17
 
18
  This model is a fine-tuned version of [riotu-lab/ArabianGPT-01B](https://huggingface.co/riotu-lab/ArabianGPT-01B) on an unknown dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 3.3262
21
+ - Bleu: 0.1060
22
 
23
  ## Model description
24
 
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 10.0,
3
- "eval_accuracy": 0.5484330769384699,
4
- "eval_loss": 2.3788321018218994,
5
- "eval_runtime": 137.4021,
6
  "eval_samples": 847,
7
- "eval_samples_per_second": 6.164,
8
- "eval_steps_per_second": 0.771,
9
- "perplexity": 10.792291203563014,
10
- "total_flos": 1.000225898496e+16,
11
- "train_loss": 2.539044189453125,
12
- "train_runtime": 1176.1508,
13
  "train_samples": 2552,
14
- "train_samples_per_second": 21.698,
15
- "train_steps_per_second": 2.712
16
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "eval_bleu": 0.10603800831832183,
4
+ "eval_loss": 3.326219081878662,
5
+ "eval_runtime": 18.595,
6
  "eval_samples": 847,
7
+ "eval_samples_per_second": 45.55,
8
+ "eval_steps_per_second": 5.7,
9
+ "perplexity": 27.832908563445223,
10
+ "total_flos": 1000225898496000.0,
11
+ "train_loss": 3.8128880228742164,
12
+ "train_runtime": 119.3474,
13
  "train_samples": 2552,
14
+ "train_samples_per_second": 21.383,
15
+ "train_steps_per_second": 2.673
16
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "epoch": 10.0,
3
- "eval_accuracy": 0.5484330769384699,
4
- "eval_loss": 2.3788321018218994,
5
- "eval_runtime": 137.4021,
6
  "eval_samples": 847,
7
- "eval_samples_per_second": 6.164,
8
- "eval_steps_per_second": 0.771,
9
- "perplexity": 10.792291203563014
10
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "eval_bleu": 0.10603800831832183,
4
+ "eval_loss": 3.326219081878662,
5
+ "eval_runtime": 18.595,
6
  "eval_samples": 847,
7
+ "eval_samples_per_second": 45.55,
8
+ "eval_steps_per_second": 5.7,
9
+ "perplexity": 27.832908563445223
10
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 10.0,
3
- "total_flos": 1.000225898496e+16,
4
- "train_loss": 2.539044189453125,
5
- "train_runtime": 1176.1508,
6
  "train_samples": 2552,
7
- "train_samples_per_second": 21.698,
8
- "train_steps_per_second": 2.712
9
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "total_flos": 1000225898496000.0,
4
+ "train_loss": 3.8128880228742164,
5
+ "train_runtime": 119.3474,
6
  "train_samples": 2552,
7
+ "train_samples_per_second": 21.383,
8
+ "train_steps_per_second": 2.673
9
  }
trainer_state.json CHANGED
@@ -1,69 +1,27 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.0,
5
  "eval_steps": 500,
6
- "global_step": 3190,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 1.567398119122257,
13
- "grad_norm": 1.1572401523590088,
14
- "learning_rate": 4.2163009404388715e-05,
15
- "loss": 3.463,
16
- "step": 500
17
- },
18
- {
19
- "epoch": 3.134796238244514,
20
- "grad_norm": 1.1287379264831543,
21
- "learning_rate": 3.4326018808777435e-05,
22
- "loss": 2.7518,
23
- "step": 1000
24
- },
25
- {
26
- "epoch": 4.702194357366771,
27
- "grad_norm": 1.1390870809555054,
28
- "learning_rate": 2.6489028213166144e-05,
29
- "loss": 2.479,
30
- "step": 1500
31
- },
32
- {
33
- "epoch": 6.269592476489028,
34
- "grad_norm": 1.1493933200836182,
35
- "learning_rate": 1.865203761755486e-05,
36
- "loss": 2.3191,
37
- "step": 2000
38
- },
39
- {
40
- "epoch": 7.836990595611285,
41
- "grad_norm": 1.1453299522399902,
42
- "learning_rate": 1.0815047021943574e-05,
43
- "loss": 2.2187,
44
- "step": 2500
45
- },
46
- {
47
- "epoch": 9.404388714733543,
48
- "grad_norm": 1.1752275228500366,
49
- "learning_rate": 2.9780564263322885e-06,
50
- "loss": 2.1545,
51
- "step": 3000
52
- },
53
- {
54
- "epoch": 10.0,
55
- "step": 3190,
56
- "total_flos": 1.000225898496e+16,
57
- "train_loss": 2.539044189453125,
58
- "train_runtime": 1176.1508,
59
- "train_samples_per_second": 21.698,
60
- "train_steps_per_second": 2.712
61
  }
62
  ],
63
  "logging_steps": 500,
64
- "max_steps": 3190,
65
  "num_input_tokens_seen": 0,
66
- "num_train_epochs": 10,
67
  "save_steps": 500,
68
  "stateful_callbacks": {
69
  "TrainerControl": {
@@ -77,7 +35,7 @@
77
  "attributes": {}
78
  }
79
  },
80
- "total_flos": 1.000225898496e+16,
81
  "train_batch_size": 8,
82
  "trial_name": null,
83
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 319,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 1.0,
13
+ "step": 319,
14
+ "total_flos": 1000225898496000.0,
15
+ "train_loss": 3.8128880228742164,
16
+ "train_runtime": 119.3474,
17
+ "train_samples_per_second": 21.383,
18
+ "train_steps_per_second": 2.673
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  }
20
  ],
21
  "logging_steps": 500,
22
+ "max_steps": 319,
23
  "num_input_tokens_seen": 0,
24
+ "num_train_epochs": 1,
25
  "save_steps": 500,
26
  "stateful_callbacks": {
27
  "TrainerControl": {
 
35
  "attributes": {}
36
  }
37
  },
38
+ "total_flos": 1000225898496000.0,
39
  "train_batch_size": 8,
40
  "trial_name": null,
41
  "trial_params": null