nlparabic commited on
Commit
1325fe5
1 Parent(s): 0dcffab

End of training

Browse files
all_results.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_bleu": 0.25109238074220425,
4
+ "eval_loss": 2.5200698375701904,
5
+ "eval_rouge1": 0.4533940426786972,
6
+ "eval_rouge2": 0.21369906854716414,
7
+ "eval_rougeL": 0.39838507960838365,
8
+ "eval_runtime": 28.8656,
9
+ "eval_samples": 884,
10
+ "eval_samples_per_second": 30.625,
11
+ "eval_steps_per_second": 3.845,
12
+ "perplexity": 12.429464676879022,
13
+ "total_flos": 4151799742464000.0,
14
+ "train_loss": 3.1427729219692684,
15
+ "train_runtime": 589.6608,
16
+ "train_samples": 3531,
17
+ "train_samples_per_second": 17.965,
18
+ "train_steps_per_second": 2.249
19
+ }
egy_training_log.txt CHANGED
@@ -146,3 +146,5 @@ WARNING:root:Epoch 2.0: No losses recorded yet.
146
  INFO:absl:Using default tokenizer.
147
  WARNING:root:Epoch 3.0: No losses recorded yet.
148
  INFO:absl:Using default tokenizer.
 
 
 
146
  INFO:absl:Using default tokenizer.
147
  WARNING:root:Epoch 3.0: No losses recorded yet.
148
  INFO:absl:Using default tokenizer.
149
+ INFO:__main__:*** Evaluate ***
150
+ INFO:absl:Using default tokenizer.
eval_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_bleu": 0.25109238074220425,
4
+ "eval_loss": 2.5200698375701904,
5
+ "eval_rouge1": 0.4533940426786972,
6
+ "eval_rouge2": 0.21369906854716414,
7
+ "eval_rougeL": 0.39838507960838365,
8
+ "eval_runtime": 28.8656,
9
+ "eval_samples": 884,
10
+ "eval_samples_per_second": 30.625,
11
+ "eval_steps_per_second": 3.845,
12
+ "perplexity": 12.429464676879022
13
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "total_flos": 4151799742464000.0,
4
+ "train_loss": 3.1427729219692684,
5
+ "train_runtime": 589.6608,
6
+ "train_samples": 3531,
7
+ "train_samples_per_second": 17.965,
8
+ "train_steps_per_second": 2.249
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 2.5200698375701904,
3
+ "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/nw_egy/checkpoint-1326",
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1326,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "grad_norm": 1.6992121934890747,
14
+ "learning_rate": 4.4200000000000004e-05,
15
+ "loss": 4.0723,
16
+ "step": 442
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_bleu": 0.22275841970876573,
21
+ "eval_loss": 2.958087921142578,
22
+ "eval_rouge1": 0.3912678429308313,
23
+ "eval_rouge2": 0.16481568948149655,
24
+ "eval_rougeL": 0.32008248461173605,
25
+ "eval_runtime": 28.7079,
26
+ "eval_samples_per_second": 30.793,
27
+ "eval_steps_per_second": 3.867,
28
+ "step": 442
29
+ },
30
+ {
31
+ "epoch": 2.0,
32
+ "grad_norm": 1.676450252532959,
33
+ "learning_rate": 2.6755447941888623e-05,
34
+ "loss": 2.8055,
35
+ "step": 884
36
+ },
37
+ {
38
+ "epoch": 2.0,
39
+ "eval_bleu": 0.24154624358915514,
40
+ "eval_loss": 2.606330633163452,
41
+ "eval_rouge1": 0.43470628711649895,
42
+ "eval_rouge2": 0.19710102219495218,
43
+ "eval_rougeL": 0.3776869994743499,
44
+ "eval_runtime": 28.9632,
45
+ "eval_samples_per_second": 30.521,
46
+ "eval_steps_per_second": 3.832,
47
+ "step": 884
48
+ },
49
+ {
50
+ "epoch": 3.0,
51
+ "grad_norm": 1.5551148653030396,
52
+ "learning_rate": 0.0,
53
+ "loss": 2.5505,
54
+ "step": 1326
55
+ },
56
+ {
57
+ "epoch": 3.0,
58
+ "eval_bleu": 0.25109238074220425,
59
+ "eval_loss": 2.5200698375701904,
60
+ "eval_rouge1": 0.4533940426786972,
61
+ "eval_rouge2": 0.21369906854716414,
62
+ "eval_rougeL": 0.39838507960838365,
63
+ "eval_runtime": 29.2852,
64
+ "eval_samples_per_second": 30.186,
65
+ "eval_steps_per_second": 3.79,
66
+ "step": 1326
67
+ },
68
+ {
69
+ "epoch": 3.0,
70
+ "step": 1326,
71
+ "total_flos": 4151799742464000.0,
72
+ "train_loss": 3.1427729219692684,
73
+ "train_runtime": 589.6608,
74
+ "train_samples_per_second": 17.965,
75
+ "train_steps_per_second": 2.249
76
+ }
77
+ ],
78
+ "logging_steps": 500,
79
+ "max_steps": 1326,
80
+ "num_input_tokens_seen": 0,
81
+ "num_train_epochs": 3,
82
+ "save_steps": 500,
83
+ "stateful_callbacks": {
84
+ "EarlyStoppingCallback": {
85
+ "args": {
86
+ "early_stopping_patience": 3,
87
+ "early_stopping_threshold": 0.0
88
+ },
89
+ "attributes": {
90
+ "early_stopping_patience_counter": 0
91
+ }
92
+ },
93
+ "TrainerControl": {
94
+ "args": {
95
+ "should_epoch_stop": false,
96
+ "should_evaluate": false,
97
+ "should_log": false,
98
+ "should_save": true,
99
+ "should_training_stop": true
100
+ },
101
+ "attributes": {}
102
+ }
103
+ },
104
+ "total_flos": 4151799742464000.0,
105
+ "train_batch_size": 8,
106
+ "trial_name": null,
107
+ "trial_params": null
108
+ }