gokuls commited on
Commit
b8c366d
1 Parent(s): 4821767

Training in progress, epoch 13

Browse files
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 12.0,
3
+ "eval_loss": 0.6131083965301514,
4
+ "eval_matthews_correlation": 0.0,
5
+ "eval_runtime": 1.6031,
6
+ "eval_samples": 1043,
7
+ "eval_samples_per_second": 650.606,
8
+ "eval_steps_per_second": 3.119,
9
+ "train_loss": 0.5614397408915501,
10
+ "train_runtime": 608.6002,
11
+ "train_samples": 8551,
12
+ "train_samples_per_second": 702.514,
13
+ "train_steps_per_second": 2.793
14
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 12.0,
3
+ "eval_loss": 0.6131083965301514,
4
+ "eval_matthews_correlation": 0.0,
5
+ "eval_runtime": 1.6031,
6
+ "eval_samples": 1043,
7
+ "eval_samples_per_second": 650.606,
8
+ "eval_steps_per_second": 3.119
9
+ }
logs/1674298259.6394777/events.out.tfevents.1674298259.gifu.1218844.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b51650b3f5fbcf0d2bb2532709ac86f6fdb2846a362fdd74e4bbfecf9fe4e1d0
3
+ size 5575
logs/1674299037.2365105/events.out.tfevents.1674299037.serv-3331.1578155.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0102f89ce7bc0b697414b4034bb4750faecc792d00864614dc78fb710cf4975
3
+ size 5575
logs/events.out.tfevents.1674211481.serv-3329.596619.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0ffe5a319db96fba78d3b0863616e6df15710e9dc156b53524948801fff770f
3
+ size 375
logs/events.out.tfevents.1674298259.gifu.1218844.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55c8304c10148085e0a31d198b247a46d3380780bb3a6196eafb02d9184fe52a
3
+ size 4047
logs/events.out.tfevents.1674299037.serv-3331.1578155.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ac5e832cfbd1b7fe5324bfdee753360a8d1e14fee7e1afab159ff670d1b08bb
3
+ size 4539
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8175bfc3e61cb3e4e31b0cdcb2f23c51ecad64b4bb6d60f61a3622eb1eef4cd0
3
  size 99795697
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a94a41abe9843609cdf3f9a87acc8ea27193651e472bb7a0ee9a2c64c1b5715b
3
  size 99795697
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 12.0,
3
+ "train_loss": 0.5614397408915501,
4
+ "train_runtime": 608.6002,
5
+ "train_samples": 8551,
6
+ "train_samples_per_second": 702.514,
7
+ "train_steps_per_second": 2.793
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.6131083965301514,
3
+ "best_model_checkpoint": "mobilebert_sa_GLUE_Experiment_cola/checkpoint-238",
4
+ "epoch": 12.0,
5
+ "global_step": 408,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "learning_rate": 4.9e-05,
13
+ "loss": 0.6197,
14
+ "step": 34
15
+ },
16
+ {
17
+ "epoch": 1.0,
18
+ "eval_loss": 0.6238651871681213,
19
+ "eval_matthews_correlation": 0.0,
20
+ "eval_runtime": 1.5899,
21
+ "eval_samples_per_second": 656.009,
22
+ "eval_steps_per_second": 3.145,
23
+ "step": 34
24
+ },
25
+ {
26
+ "epoch": 2.0,
27
+ "learning_rate": 4.8e-05,
28
+ "loss": 0.6078,
29
+ "step": 68
30
+ },
31
+ {
32
+ "epoch": 2.0,
33
+ "eval_loss": 0.6178815364837646,
34
+ "eval_matthews_correlation": 0.0,
35
+ "eval_runtime": 1.5812,
36
+ "eval_samples_per_second": 659.606,
37
+ "eval_steps_per_second": 3.162,
38
+ "step": 68
39
+ },
40
+ {
41
+ "epoch": 3.0,
42
+ "learning_rate": 4.7e-05,
43
+ "loss": 0.6064,
44
+ "step": 102
45
+ },
46
+ {
47
+ "epoch": 3.0,
48
+ "eval_loss": 0.6179934144020081,
49
+ "eval_matthews_correlation": 0.0,
50
+ "eval_runtime": 2.4438,
51
+ "eval_samples_per_second": 426.802,
52
+ "eval_steps_per_second": 2.046,
53
+ "step": 102
54
+ },
55
+ {
56
+ "epoch": 4.0,
57
+ "learning_rate": 4.600000000000001e-05,
58
+ "loss": 0.6073,
59
+ "step": 136
60
+ },
61
+ {
62
+ "epoch": 4.0,
63
+ "eval_loss": 0.6175711750984192,
64
+ "eval_matthews_correlation": 0.0,
65
+ "eval_runtime": 1.7323,
66
+ "eval_samples_per_second": 602.096,
67
+ "eval_steps_per_second": 2.886,
68
+ "step": 136
69
+ },
70
+ {
71
+ "epoch": 5.0,
72
+ "learning_rate": 4.5e-05,
73
+ "loss": 0.6069,
74
+ "step": 170
75
+ },
76
+ {
77
+ "epoch": 5.0,
78
+ "eval_loss": 0.6172661781311035,
79
+ "eval_matthews_correlation": 0.0,
80
+ "eval_runtime": 1.7156,
81
+ "eval_samples_per_second": 607.942,
82
+ "eval_steps_per_second": 2.914,
83
+ "step": 170
84
+ },
85
+ {
86
+ "epoch": 6.0,
87
+ "learning_rate": 4.4000000000000006e-05,
88
+ "loss": 0.6043,
89
+ "step": 204
90
+ },
91
+ {
92
+ "epoch": 6.0,
93
+ "eval_loss": 0.6166184544563293,
94
+ "eval_matthews_correlation": 0.0,
95
+ "eval_runtime": 1.9408,
96
+ "eval_samples_per_second": 537.405,
97
+ "eval_steps_per_second": 2.576,
98
+ "step": 204
99
+ },
100
+ {
101
+ "epoch": 7.0,
102
+ "learning_rate": 4.3e-05,
103
+ "loss": 0.6004,
104
+ "step": 238
105
+ },
106
+ {
107
+ "epoch": 7.0,
108
+ "eval_loss": 0.6131083965301514,
109
+ "eval_matthews_correlation": 0.0,
110
+ "eval_runtime": 1.7507,
111
+ "eval_samples_per_second": 595.751,
112
+ "eval_steps_per_second": 2.856,
113
+ "step": 238
114
+ },
115
+ {
116
+ "epoch": 8.0,
117
+ "learning_rate": 4.2e-05,
118
+ "loss": 0.5842,
119
+ "step": 272
120
+ },
121
+ {
122
+ "epoch": 8.0,
123
+ "eval_loss": 0.6240708231925964,
124
+ "eval_matthews_correlation": 0.0951039122870703,
125
+ "eval_runtime": 1.7156,
126
+ "eval_samples_per_second": 607.967,
127
+ "eval_steps_per_second": 2.915,
128
+ "step": 272
129
+ },
130
+ {
131
+ "epoch": 9.0,
132
+ "learning_rate": 4.1e-05,
133
+ "loss": 0.5192,
134
+ "step": 306
135
+ },
136
+ {
137
+ "epoch": 9.0,
138
+ "eval_loss": 0.6361746191978455,
139
+ "eval_matthews_correlation": 0.059760920069176514,
140
+ "eval_runtime": 1.9833,
141
+ "eval_samples_per_second": 525.879,
142
+ "eval_steps_per_second": 2.521,
143
+ "step": 306
144
+ },
145
+ {
146
+ "epoch": 10.0,
147
+ "learning_rate": 4e-05,
148
+ "loss": 0.4884,
149
+ "step": 340
150
+ },
151
+ {
152
+ "epoch": 10.0,
153
+ "eval_loss": 0.7009902596473694,
154
+ "eval_matthews_correlation": 0.08008155523655092,
155
+ "eval_runtime": 1.7208,
156
+ "eval_samples_per_second": 606.103,
157
+ "eval_steps_per_second": 2.906,
158
+ "step": 340
159
+ },
160
+ {
161
+ "epoch": 11.0,
162
+ "learning_rate": 3.9000000000000006e-05,
163
+ "loss": 0.4559,
164
+ "step": 374
165
+ },
166
+ {
167
+ "epoch": 11.0,
168
+ "eval_loss": 0.6731011867523193,
169
+ "eval_matthews_correlation": 0.09051190856095573,
170
+ "eval_runtime": 1.7848,
171
+ "eval_samples_per_second": 584.391,
172
+ "eval_steps_per_second": 2.801,
173
+ "step": 374
174
+ },
175
+ {
176
+ "epoch": 12.0,
177
+ "learning_rate": 3.8e-05,
178
+ "loss": 0.4367,
179
+ "step": 408
180
+ },
181
+ {
182
+ "epoch": 12.0,
183
+ "eval_loss": 0.6893478035926819,
184
+ "eval_matthews_correlation": 0.09007205990892461,
185
+ "eval_runtime": 1.959,
186
+ "eval_samples_per_second": 532.427,
187
+ "eval_steps_per_second": 2.552,
188
+ "step": 408
189
+ },
190
+ {
191
+ "epoch": 12.0,
192
+ "step": 408,
193
+ "total_flos": 3258721140473856.0,
194
+ "train_loss": 0.5614397408915501,
195
+ "train_runtime": 608.6002,
196
+ "train_samples_per_second": 702.514,
197
+ "train_steps_per_second": 2.793
198
+ }
199
+ ],
200
+ "max_steps": 1700,
201
+ "num_train_epochs": 50,
202
+ "total_flos": 3258721140473856.0,
203
+ "trial_name": null,
204
+ "trial_params": null
205
+ }