albertmartinez commited on
Commit
472fecb
1 Parent(s): bb68235

End of training

Browse files
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_f1": 0.789972706564468,
4
- "eval_loss": 0.7343637943267822,
5
- "eval_runtime": 2232.9309,
6
  "eval_samples": 12908,
7
- "eval_samples_per_second": 5.781,
8
- "eval_steps_per_second": 0.723,
9
  "total_flos": 2.377533515518771e+16,
10
- "train_loss": 1.0759811563350086,
11
- "train_runtime": 46811.0436,
12
  "train_samples": 30117,
13
- "train_samples_per_second": 1.93,
14
- "train_steps_per_second": 0.06
15
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_f1": 0.8045510528812365,
4
+ "eval_loss": 0.6810711622238159,
5
+ "eval_runtime": 1951.2993,
6
  "eval_samples": 12908,
7
+ "eval_samples_per_second": 6.615,
8
+ "eval_steps_per_second": 0.207,
9
  "total_flos": 2.377533515518771e+16,
10
+ "train_loss": 0.9083002715438119,
11
+ "train_runtime": 52770.4686,
12
  "train_samples": 30117,
13
+ "train_samples_per_second": 1.712,
14
+ "train_steps_per_second": 0.054
15
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_f1": 0.789972706564468,
4
- "eval_loss": 0.7343637943267822,
5
- "eval_runtime": 2232.9309,
6
  "eval_samples": 12908,
7
- "eval_samples_per_second": 5.781,
8
- "eval_steps_per_second": 0.723
9
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_f1": 0.8045510528812365,
4
+ "eval_loss": 0.6810711622238159,
5
+ "eval_runtime": 1951.2993,
6
  "eval_samples": 12908,
7
+ "eval_samples_per_second": 6.615,
8
+ "eval_steps_per_second": 0.207
9
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 3.0,
3
  "total_flos": 2.377533515518771e+16,
4
- "train_loss": 1.0759811563350086,
5
- "train_runtime": 46811.0436,
6
  "train_samples": 30117,
7
- "train_samples_per_second": 1.93,
8
- "train_steps_per_second": 0.06
9
  }
 
1
  {
2
  "epoch": 3.0,
3
  "total_flos": 2.377533515518771e+16,
4
+ "train_loss": 0.9083002715438119,
5
+ "train_runtime": 52770.4686,
6
  "train_samples": 30117,
7
+ "train_samples_per_second": 1.712,
8
+ "train_steps_per_second": 0.054
9
  }
trainer_state.json CHANGED
@@ -2,55 +2,68 @@
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 3.0,
5
- "eval_steps": 500,
6
  "global_step": 2826,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.5307855626326964,
13
- "grad_norm": 5.489389896392822,
14
- "learning_rate": 8.333333333333334e-06,
15
- "loss": 2.354,
16
- "step": 500
17
  },
18
  {
19
- "epoch": 1.0615711252653928,
20
- "grad_norm": 5.129901885986328,
21
- "learning_rate": 8.203054806828391e-06,
22
- "loss": 1.0543,
23
- "step": 1000
 
 
24
  },
25
  {
26
- "epoch": 1.5923566878980893,
27
- "grad_norm": 9.780948638916016,
28
- "learning_rate": 5.9568733153638815e-06,
29
- "loss": 0.8205,
30
- "step": 1500
31
  },
32
  {
33
- "epoch": 2.1231422505307855,
34
- "grad_norm": 8.44005012512207,
35
- "learning_rate": 3.710691823899371e-06,
36
- "loss": 0.7534,
37
- "step": 2000
 
 
38
  },
39
  {
40
- "epoch": 2.653927813163482,
41
- "grad_norm": 4.482905387878418,
42
- "learning_rate": 1.464510332434861e-06,
43
- "loss": 0.6727,
44
- "step": 2500
 
 
 
 
 
 
 
 
 
45
  },
46
  {
47
  "epoch": 3.0,
48
  "step": 2826,
49
  "total_flos": 2.377533515518771e+16,
50
- "train_loss": 1.0759811563350086,
51
- "train_runtime": 46811.0436,
52
- "train_samples_per_second": 1.93,
53
- "train_steps_per_second": 0.06
54
  }
55
  ],
56
  "logging_steps": 500,
 
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 3.0,
5
+ "eval_steps": 300.0,
6
  "global_step": 2826,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 1.0,
13
+ "grad_norm": 15.699420928955078,
14
+ "learning_rate": 1.6927223719676552e-05,
15
+ "loss": 1.5106,
16
+ "step": 942
17
  },
18
  {
19
+ "epoch": 1.0,
20
+ "eval_f1": 0.7668134917297256,
21
+ "eval_loss": 0.8142930269241333,
22
+ "eval_runtime": 1955.7939,
23
+ "eval_samples_per_second": 6.6,
24
+ "eval_steps_per_second": 0.207,
25
+ "step": 942
26
  },
27
  {
28
+ "epoch": 2.0,
29
+ "grad_norm": 4.08961820602417,
30
+ "learning_rate": 8.463611859838276e-06,
31
+ "loss": 0.7033,
32
+ "step": 1884
33
  },
34
  {
35
+ "epoch": 2.0,
36
+ "eval_f1": 0.7984619452840845,
37
+ "eval_loss": 0.6980345845222473,
38
+ "eval_runtime": 1952.7483,
39
+ "eval_samples_per_second": 6.61,
40
+ "eval_steps_per_second": 0.207,
41
+ "step": 1884
42
  },
43
  {
44
+ "epoch": 3.0,
45
+ "grad_norm": 25.8586483001709,
46
+ "learning_rate": 0.0,
47
+ "loss": 0.511,
48
+ "step": 2826
49
+ },
50
+ {
51
+ "epoch": 3.0,
52
+ "eval_f1": 0.8045510528812365,
53
+ "eval_loss": 0.6810711622238159,
54
+ "eval_runtime": 1950.0401,
55
+ "eval_samples_per_second": 6.619,
56
+ "eval_steps_per_second": 0.207,
57
+ "step": 2826
58
  },
59
  {
60
  "epoch": 3.0,
61
  "step": 2826,
62
  "total_flos": 2.377533515518771e+16,
63
+ "train_loss": 0.9083002715438119,
64
+ "train_runtime": 52770.4686,
65
+ "train_samples_per_second": 1.712,
66
+ "train_steps_per_second": 0.054
67
  }
68
  ],
69
  "logging_steps": 500,