xiaoming-leza commited on
Commit
40b95f8
1 Parent(s): cff9c11

End of training

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "epoch": 15.0,
3
- "eval_loss": 0.3827837407588959,
4
- "eval_runtime": 112.0874,
5
  "eval_samples": 1647,
6
- "eval_samples_per_second": 14.694,
7
- "eval_steps_per_second": 1.838,
8
- "eval_wer": 0.34950464712491064,
9
- "train_loss": 1.0725414783582774,
10
- "train_runtime": 6951.8597,
11
  "train_samples": 3478,
12
- "train_samples_per_second": 7.504,
13
- "train_steps_per_second": 0.235
14
  }
 
1
  {
2
  "epoch": 15.0,
3
+ "eval_loss": 0.3709133565425873,
4
+ "eval_runtime": 115.3267,
5
  "eval_samples": 1647,
6
+ "eval_samples_per_second": 14.281,
7
+ "eval_steps_per_second": 1.786,
8
+ "eval_wer": 0.3428658972525789,
9
+ "train_loss": 1.0641350445761957,
10
+ "train_runtime": 6976.4654,
11
  "train_samples": 3478,
12
+ "train_samples_per_second": 7.478,
13
+ "train_steps_per_second": 0.234
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 15.0,
3
- "eval_loss": 0.3827837407588959,
4
- "eval_runtime": 112.0874,
5
  "eval_samples": 1647,
6
- "eval_samples_per_second": 14.694,
7
- "eval_steps_per_second": 1.838,
8
- "eval_wer": 0.34950464712491064
9
  }
 
1
  {
2
  "epoch": 15.0,
3
+ "eval_loss": 0.3709133565425873,
4
+ "eval_runtime": 115.3267,
5
  "eval_samples": 1647,
6
+ "eval_samples_per_second": 14.281,
7
+ "eval_steps_per_second": 1.786,
8
+ "eval_wer": 0.3428658972525789
9
  }
runs/Jul26_07-42-20_8b1495053dbb/events.out.tfevents.1690365097.8b1495053dbb.1511.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8db96e9cbabca7391a632d90e73681ba7f2fc69d97c0b5db0b7d74e48eeff4a
3
+ size 406
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 15.0,
3
- "train_loss": 1.0725414783582774,
4
- "train_runtime": 6951.8597,
5
  "train_samples": 3478,
6
- "train_samples_per_second": 7.504,
7
- "train_steps_per_second": 0.235
8
  }
 
1
  {
2
  "epoch": 15.0,
3
+ "train_loss": 1.0641350445761957,
4
+ "train_runtime": 6976.4654,
5
  "train_samples": 3478,
6
+ "train_samples_per_second": 7.478,
7
+ "train_steps_per_second": 0.234
8
  }
trainer_state.json CHANGED
@@ -9,174 +9,174 @@
9
  "log_history": [
10
  {
11
  "epoch": 0.92,
12
- "eval_loss": 3.5826058387756348,
13
- "eval_runtime": 112.5008,
14
- "eval_samples_per_second": 14.64,
15
- "eval_steps_per_second": 1.831,
16
  "eval_wer": 1.0,
17
  "step": 100
18
  },
19
  {
20
  "epoch": 1.83,
21
- "eval_loss": 3.0217819213867188,
22
- "eval_runtime": 113.1278,
23
- "eval_samples_per_second": 14.559,
24
- "eval_steps_per_second": 1.821,
25
  "eval_wer": 0.9998978653865795,
26
  "step": 200
27
  },
28
  {
29
  "epoch": 2.75,
30
- "eval_loss": 0.8984940052032471,
31
- "eval_runtime": 111.3066,
32
- "eval_samples_per_second": 14.797,
33
- "eval_steps_per_second": 1.851,
34
- "eval_wer": 0.8035951383924012,
35
  "step": 300
36
  },
37
  {
38
  "epoch": 3.67,
39
- "eval_loss": 0.5992483496665955,
40
- "eval_runtime": 112.2673,
41
- "eval_samples_per_second": 14.67,
42
- "eval_steps_per_second": 1.835,
43
- "eval_wer": 0.619650699622102,
44
  "step": 400
45
  },
46
  {
47
  "epoch": 4.59,
48
  "learning_rate": 0.00029939999999999996,
49
- "loss": 3.1629,
50
  "step": 500
51
  },
52
  {
53
  "epoch": 4.59,
54
- "eval_loss": 0.49684980511665344,
55
- "eval_runtime": 112.6664,
56
- "eval_samples_per_second": 14.618,
57
- "eval_steps_per_second": 1.828,
58
- "eval_wer": 0.5339597589623123,
59
  "step": 500
60
  },
61
  {
62
  "epoch": 5.5,
63
- "eval_loss": 0.4645964503288269,
64
- "eval_runtime": 111.3096,
65
- "eval_samples_per_second": 14.797,
66
- "eval_steps_per_second": 1.851,
67
- "eval_wer": 0.5045449902972117,
68
  "step": 600
69
  },
70
  {
71
  "epoch": 6.42,
72
- "eval_loss": 0.43159034848213196,
73
- "eval_runtime": 113.1768,
74
- "eval_samples_per_second": 14.552,
75
- "eval_steps_per_second": 1.82,
76
- "eval_wer": 0.44254927995097537,
77
  "step": 700
78
  },
79
  {
80
  "epoch": 7.34,
81
- "eval_loss": 0.44998037815093994,
82
- "eval_runtime": 111.8678,
83
- "eval_samples_per_second": 14.723,
84
- "eval_steps_per_second": 1.841,
85
- "eval_wer": 0.4734960678173833,
86
  "step": 800
87
  },
88
  {
89
  "epoch": 8.26,
90
- "eval_loss": 0.41139060258865356,
91
- "eval_runtime": 113.568,
92
- "eval_samples_per_second": 14.502,
93
- "eval_steps_per_second": 1.814,
94
- "eval_wer": 0.41231743437851087,
95
  "step": 900
96
  },
97
  {
98
  "epoch": 9.17,
99
  "learning_rate": 0.00016810572687224668,
100
- "loss": 0.2226,
101
  "step": 1000
102
  },
103
  {
104
  "epoch": 9.17,
105
- "eval_loss": 0.41622716188430786,
106
- "eval_runtime": 112.362,
107
- "eval_samples_per_second": 14.658,
108
- "eval_steps_per_second": 1.833,
109
- "eval_wer": 0.4018997038096211,
110
  "step": 1000
111
  },
112
  {
113
  "epoch": 10.09,
114
- "eval_loss": 0.3999302089214325,
115
- "eval_runtime": 113.8252,
116
- "eval_samples_per_second": 14.47,
117
- "eval_steps_per_second": 1.81,
118
- "eval_wer": 0.38239199264630785,
119
  "step": 1100
120
  },
121
  {
122
  "epoch": 11.01,
123
- "eval_loss": 0.404796838760376,
124
- "eval_runtime": 112.9341,
125
- "eval_samples_per_second": 14.584,
126
- "eval_steps_per_second": 1.824,
127
- "eval_wer": 0.38423041568787664,
128
  "step": 1200
129
  },
130
  {
131
  "epoch": 11.93,
132
- "eval_loss": 0.3789042532444,
133
- "eval_runtime": 112.9251,
134
- "eval_samples_per_second": 14.585,
135
- "eval_steps_per_second": 1.824,
136
- "eval_wer": 0.3602287815340619,
137
  "step": 1300
138
  },
139
  {
140
  "epoch": 12.84,
141
- "eval_loss": 0.4024040102958679,
142
- "eval_runtime": 114.3571,
143
- "eval_samples_per_second": 14.402,
144
- "eval_steps_per_second": 1.801,
145
- "eval_wer": 0.35359003166173014,
146
  "step": 1400
147
  },
148
  {
149
  "epoch": 13.76,
150
  "learning_rate": 3.594713656387665e-05,
151
- "loss": 0.1015,
152
  "step": 1500
153
  },
154
  {
155
  "epoch": 13.76,
156
- "eval_loss": 0.38988158106803894,
157
- "eval_runtime": 113.4392,
158
- "eval_samples_per_second": 14.519,
159
- "eval_steps_per_second": 1.816,
160
- "eval_wer": 0.35747114697170873,
161
  "step": 1500
162
  },
163
  {
164
  "epoch": 14.68,
165
- "eval_loss": 0.3801647126674652,
166
- "eval_runtime": 112.3523,
167
- "eval_samples_per_second": 14.659,
168
- "eval_steps_per_second": 1.834,
169
- "eval_wer": 0.3489939740578082,
170
  "step": 1600
171
  },
172
  {
173
  "epoch": 15.0,
174
  "step": 1635,
175
  "total_flos": 6.468063251673315e+18,
176
- "train_loss": 1.0725414783582774,
177
- "train_runtime": 6951.8597,
178
- "train_samples_per_second": 7.504,
179
- "train_steps_per_second": 0.235
180
  }
181
  ],
182
  "max_steps": 1635,
 
9
  "log_history": [
10
  {
11
  "epoch": 0.92,
12
+ "eval_loss": 3.5988094806671143,
13
+ "eval_runtime": 113.2606,
14
+ "eval_samples_per_second": 14.542,
15
+ "eval_steps_per_second": 1.819,
16
  "eval_wer": 1.0,
17
  "step": 100
18
  },
19
  {
20
  "epoch": 1.83,
21
+ "eval_loss": 3.008251905441284,
22
+ "eval_runtime": 111.5132,
23
+ "eval_samples_per_second": 14.77,
24
+ "eval_steps_per_second": 1.847,
25
  "eval_wer": 0.9998978653865795,
26
  "step": 200
27
  },
28
  {
29
  "epoch": 2.75,
30
+ "eval_loss": 0.8641791939735413,
31
+ "eval_runtime": 113.049,
32
+ "eval_samples_per_second": 14.569,
33
+ "eval_steps_per_second": 1.822,
34
+ "eval_wer": 0.757940966193443,
35
  "step": 300
36
  },
37
  {
38
  "epoch": 3.67,
39
+ "eval_loss": 0.5713350176811218,
40
+ "eval_runtime": 113.001,
41
+ "eval_samples_per_second": 14.575,
42
+ "eval_steps_per_second": 1.823,
43
+ "eval_wer": 0.6202635073026248,
44
  "step": 400
45
  },
46
  {
47
  "epoch": 4.59,
48
  "learning_rate": 0.00029939999999999996,
49
+ "loss": 3.14,
50
  "step": 500
51
  },
52
  {
53
  "epoch": 4.59,
54
+ "eval_loss": 0.47950074076652527,
55
+ "eval_runtime": 113.7593,
56
+ "eval_samples_per_second": 14.478,
57
+ "eval_steps_per_second": 1.811,
58
+ "eval_wer": 0.5337554897354714,
59
  "step": 500
60
  },
61
  {
62
  "epoch": 5.5,
63
+ "eval_loss": 0.4440889060497284,
64
+ "eval_runtime": 114.5305,
65
+ "eval_samples_per_second": 14.38,
66
+ "eval_steps_per_second": 1.799,
67
+ "eval_wer": 0.49116535593912775,
68
  "step": 600
69
  },
70
  {
71
  "epoch": 6.42,
72
+ "eval_loss": 0.4241042733192444,
73
+ "eval_runtime": 115.6017,
74
+ "eval_samples_per_second": 14.247,
75
+ "eval_steps_per_second": 1.782,
76
+ "eval_wer": 0.4521499336125013,
77
  "step": 700
78
  },
79
  {
80
  "epoch": 7.34,
81
+ "eval_loss": 0.4326329827308655,
82
+ "eval_runtime": 113.9718,
83
+ "eval_samples_per_second": 14.451,
84
+ "eval_steps_per_second": 1.807,
85
+ "eval_wer": 0.4611377795935042,
86
  "step": 800
87
  },
88
  {
89
  "epoch": 8.26,
90
+ "eval_loss": 0.39125096797943115,
91
+ "eval_runtime": 115.5103,
92
+ "eval_samples_per_second": 14.258,
93
+ "eval_steps_per_second": 1.783,
94
+ "eval_wer": 0.42120314574609335,
95
  "step": 900
96
  },
97
  {
98
  "epoch": 9.17,
99
  "learning_rate": 0.00016810572687224668,
100
+ "loss": 0.2183,
101
  "step": 1000
102
  },
103
  {
104
  "epoch": 9.17,
105
+ "eval_loss": 0.40361154079437256,
106
+ "eval_runtime": 114.1472,
107
+ "eval_samples_per_second": 14.429,
108
+ "eval_steps_per_second": 1.805,
109
+ "eval_wer": 0.39730364620569913,
110
  "step": 1000
111
  },
112
  {
113
  "epoch": 10.09,
114
+ "eval_loss": 0.40348684787750244,
115
+ "eval_runtime": 113.8647,
116
+ "eval_samples_per_second": 14.465,
117
+ "eval_steps_per_second": 1.809,
118
+ "eval_wer": 0.3958737616178123,
119
  "step": 1100
120
  },
121
  {
122
  "epoch": 11.01,
123
+ "eval_loss": 0.38071152567863464,
124
+ "eval_runtime": 112.8863,
125
+ "eval_samples_per_second": 14.59,
126
+ "eval_steps_per_second": 1.825,
127
+ "eval_wer": 0.3790215504034317,
128
  "step": 1200
129
  },
130
  {
131
  "epoch": 11.93,
132
+ "eval_loss": 0.3750300109386444,
133
+ "eval_runtime": 113.6392,
134
+ "eval_samples_per_second": 14.493,
135
+ "eval_steps_per_second": 1.813,
136
+ "eval_wer": 0.36502910836482483,
137
  "step": 1300
138
  },
139
  {
140
  "epoch": 12.84,
141
+ "eval_loss": 0.38215482234954834,
142
+ "eval_runtime": 114.233,
143
+ "eval_samples_per_second": 14.418,
144
+ "eval_steps_per_second": 1.803,
145
+ "eval_wer": 0.35726687774486776,
146
  "step": 1400
147
  },
148
  {
149
  "epoch": 13.76,
150
  "learning_rate": 3.594713656387665e-05,
151
+ "loss": 0.1011,
152
  "step": 1500
153
  },
154
  {
155
  "epoch": 13.76,
156
+ "eval_loss": 0.37470725178718567,
157
+ "eval_runtime": 113.9728,
158
+ "eval_samples_per_second": 14.451,
159
+ "eval_steps_per_second": 1.807,
160
+ "eval_wer": 0.35103666632621794,
161
  "step": 1500
162
  },
163
  {
164
  "epoch": 14.68,
165
+ "eval_loss": 0.37135428190231323,
166
+ "eval_runtime": 113.4186,
167
+ "eval_samples_per_second": 14.521,
168
+ "eval_steps_per_second": 1.816,
169
+ "eval_wer": 0.3454192625880911,
170
  "step": 1600
171
  },
172
  {
173
  "epoch": 15.0,
174
  "step": 1635,
175
  "total_flos": 6.468063251673315e+18,
176
+ "train_loss": 1.0641350445761957,
177
+ "train_runtime": 6976.4654,
178
+ "train_samples_per_second": 7.478,
179
+ "train_steps_per_second": 0.234
180
  }
181
  ],
182
  "max_steps": 1635,