nlparabic commited on
Commit
c5f2c1c
1 Parent(s): f093fe8

End of training

Browse files
Files changed (5) hide show
  1. README.md +5 -0
  2. all_results.json +12 -12
  3. eval_results.json +7 -7
  4. train_results.json +6 -6
  5. trainer_state.json +75 -12
README.md CHANGED
@@ -3,6 +3,8 @@ license: apache-2.0
3
  base_model: riotu-lab/ArabianGPT-01B
4
  tags:
5
  - generated_from_trainer
 
 
6
  model-index:
7
  - name: test-aragpt
8
  results: []
@@ -14,6 +16,9 @@ should probably proofread and complete it, then remove this comment. -->
14
  # test-aragpt
15
 
16
  This model is a fine-tuned version of [riotu-lab/ArabianGPT-01B](https://huggingface.co/riotu-lab/ArabianGPT-01B) on an unknown dataset.
 
 
 
17
 
18
  ## Model description
19
 
 
3
  base_model: riotu-lab/ArabianGPT-01B
4
  tags:
5
  - generated_from_trainer
6
+ metrics:
7
+ - bleu
8
  model-index:
9
  - name: test-aragpt
10
  results: []
 
16
  # test-aragpt
17
 
18
  This model is a fine-tuned version of [riotu-lab/ArabianGPT-01B](https://huggingface.co/riotu-lab/ArabianGPT-01B) on an unknown dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 2.2835
21
+ - Bleu: 0.2080
22
 
23
  ## Model description
24
 
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_bleu": 0.10603800831832183,
4
- "eval_loss": 3.326219081878662,
5
- "eval_runtime": 18.595,
6
  "eval_samples": 847,
7
- "eval_samples_per_second": 45.55,
8
- "eval_steps_per_second": 5.7,
9
- "perplexity": 27.832908563445223,
10
- "total_flos": 1000225898496000.0,
11
- "train_loss": 3.8128880228742164,
12
- "train_runtime": 119.3474,
13
  "train_samples": 2552,
14
- "train_samples_per_second": 21.383,
15
- "train_steps_per_second": 2.673
16
  }
 
1
  {
2
+ "epoch": 15.0,
3
+ "eval_bleu": 0.20804732558419864,
4
+ "eval_loss": 2.2835311889648438,
5
+ "eval_runtime": 19.725,
6
  "eval_samples": 847,
7
+ "eval_samples_per_second": 42.94,
8
+ "eval_steps_per_second": 5.374,
9
+ "perplexity": 9.811264741472648,
10
+ "total_flos": 1.500338847744e+16,
11
+ "train_loss": 2.280068150184496,
12
+ "train_runtime": 1782.6608,
13
  "train_samples": 2552,
14
+ "train_samples_per_second": 21.474,
15
+ "train_steps_per_second": 2.684
16
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_bleu": 0.10603800831832183,
4
- "eval_loss": 3.326219081878662,
5
- "eval_runtime": 18.595,
6
  "eval_samples": 847,
7
- "eval_samples_per_second": 45.55,
8
- "eval_steps_per_second": 5.7,
9
- "perplexity": 27.832908563445223
10
  }
 
1
  {
2
+ "epoch": 15.0,
3
+ "eval_bleu": 0.20804732558419864,
4
+ "eval_loss": 2.2835311889648438,
5
+ "eval_runtime": 19.725,
6
  "eval_samples": 847,
7
+ "eval_samples_per_second": 42.94,
8
+ "eval_steps_per_second": 5.374,
9
+ "perplexity": 9.811264741472648
10
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 1.0,
3
- "total_flos": 1000225898496000.0,
4
- "train_loss": 3.8128880228742164,
5
- "train_runtime": 119.3474,
6
  "train_samples": 2552,
7
- "train_samples_per_second": 21.383,
8
- "train_steps_per_second": 2.673
9
  }
 
1
  {
2
+ "epoch": 15.0,
3
+ "total_flos": 1.500338847744e+16,
4
+ "train_loss": 2.280068150184496,
5
+ "train_runtime": 1782.6608,
6
  "train_samples": 2552,
7
+ "train_samples_per_second": 21.474,
8
+ "train_steps_per_second": 2.684
9
  }
trainer_state.json CHANGED
@@ -1,27 +1,90 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
  "eval_steps": 500,
6
- "global_step": 319,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 1.0,
13
- "step": 319,
14
- "total_flos": 1000225898496000.0,
15
- "train_loss": 3.8128880228742164,
16
- "train_runtime": 119.3474,
17
- "train_samples_per_second": 21.383,
18
- "train_steps_per_second": 2.673
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  }
20
  ],
21
  "logging_steps": 500,
22
- "max_steps": 319,
23
  "num_input_tokens_seen": 0,
24
- "num_train_epochs": 1,
25
  "save_steps": 500,
26
  "stateful_callbacks": {
27
  "TrainerControl": {
@@ -35,7 +98,7 @@
35
  "attributes": {}
36
  }
37
  },
38
- "total_flos": 1000225898496000.0,
39
  "train_batch_size": 8,
40
  "trial_name": null,
41
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 15.0,
5
  "eval_steps": 500,
6
+ "global_step": 4785,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 1.567398119122257,
13
+ "grad_norm": 1.1618669033050537,
14
+ "learning_rate": 4.4775339602925815e-05,
15
+ "loss": 3.4578,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 3.134796238244514,
20
+ "grad_norm": 1.134594440460205,
21
+ "learning_rate": 3.955067920585162e-05,
22
+ "loss": 2.7316,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 4.702194357366771,
27
+ "grad_norm": 1.16652250289917,
28
+ "learning_rate": 3.4326018808777435e-05,
29
+ "loss": 2.4393,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 6.269592476489028,
34
+ "grad_norm": 1.1664645671844482,
35
+ "learning_rate": 2.9101358411703238e-05,
36
+ "loss": 2.2559,
37
+ "step": 2000
38
+ },
39
+ {
40
+ "epoch": 7.836990595611285,
41
+ "grad_norm": 1.1747838258743286,
42
+ "learning_rate": 2.387669801462905e-05,
43
+ "loss": 2.127,
44
+ "step": 2500
45
+ },
46
+ {
47
+ "epoch": 9.404388714733543,
48
+ "grad_norm": 1.2233911752700806,
49
+ "learning_rate": 1.865203761755486e-05,
50
+ "loss": 2.0242,
51
+ "step": 3000
52
+ },
53
+ {
54
+ "epoch": 10.971786833855798,
55
+ "grad_norm": 1.149554967880249,
56
+ "learning_rate": 1.3427377220480669e-05,
57
+ "loss": 1.9585,
58
+ "step": 3500
59
+ },
60
+ {
61
+ "epoch": 12.539184952978056,
62
+ "grad_norm": 1.1873332262039185,
63
+ "learning_rate": 8.202716823406478e-06,
64
+ "loss": 1.9004,
65
+ "step": 4000
66
+ },
67
+ {
68
+ "epoch": 14.106583072100314,
69
+ "grad_norm": 1.1350129842758179,
70
+ "learning_rate": 2.9780564263322885e-06,
71
+ "loss": 1.8692,
72
+ "step": 4500
73
+ },
74
+ {
75
+ "epoch": 15.0,
76
+ "step": 4785,
77
+ "total_flos": 1.500338847744e+16,
78
+ "train_loss": 2.280068150184496,
79
+ "train_runtime": 1782.6608,
80
+ "train_samples_per_second": 21.474,
81
+ "train_steps_per_second": 2.684
82
  }
83
  ],
84
  "logging_steps": 500,
85
+ "max_steps": 4785,
86
  "num_input_tokens_seen": 0,
87
+ "num_train_epochs": 15,
88
  "save_steps": 500,
89
  "stateful_callbacks": {
90
  "TrainerControl": {
 
98
  "attributes": {}
99
  }
100
  },
101
+ "total_flos": 1.500338847744e+16,
102
  "train_batch_size": 8,
103
  "trial_name": null,
104
  "trial_params": null