anhng94 commited on
Commit
2354f15
1 Parent(s): 915ed25

Training in progress, step 10

Browse files
adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "k_proj",
 
 
24
  "v_proj",
25
- "q_proj",
26
  "down_proj",
27
- "o_proj",
28
- "up_proj",
29
- "gate_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "up_proj",
24
+ "gate_proj",
25
+ "o_proj",
26
  "v_proj",
 
27
  "down_proj",
28
+ "q_proj",
29
+ "k_proj"
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4d210f635eac44e942700b41347f4eb64ae38698cc6c694f1f45bcf575e5762
3
  size 323014560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edf7264f1b7d1fe326fc6176263327445742f9a35ffd19d0c04df5344f7f1fab
3
  size 323014560
trainer_state.json CHANGED
@@ -10,201 +10,201 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.03686635944700461,
13
- "grad_norm": 0.28515195710660185,
14
  "learning_rate": 0.0,
15
  "loss": 1.4734,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.07373271889400922,
20
- "grad_norm": 0.26537944818973397,
21
  "learning_rate": 0.0003,
22
- "loss": 1.2799,
23
  "step": 2
24
  },
25
  {
26
  "epoch": 0.11059907834101383,
27
- "grad_norm": 0.32456262433064903,
28
  "learning_rate": 0.0003,
29
- "loss": 1.4609,
30
  "step": 3
31
  },
32
  {
33
  "epoch": 0.14746543778801843,
34
- "grad_norm": 0.3941851070383984,
35
  "learning_rate": 0.0003,
36
- "loss": 1.1246,
37
  "step": 4
38
  },
39
  {
40
  "epoch": 0.18433179723502305,
41
- "grad_norm": 0.14354429714527142,
42
  "learning_rate": 0.0003,
43
- "loss": 1.0118,
44
  "step": 5
45
  },
46
  {
47
  "epoch": 0.22119815668202766,
48
- "grad_norm": 0.15244360645214902,
49
  "learning_rate": 0.0003,
50
- "loss": 1.0724,
51
  "step": 6
52
  },
53
  {
54
  "epoch": 0.25806451612903225,
55
- "grad_norm": 0.17439504736452185,
56
  "learning_rate": 0.0003,
57
- "loss": 0.8952,
58
  "step": 7
59
  },
60
  {
61
  "epoch": 0.29493087557603687,
62
- "grad_norm": 1.8871020983461488,
63
  "learning_rate": 0.0003,
64
- "loss": 0.9702,
65
  "step": 8
66
  },
67
  {
68
  "epoch": 0.3317972350230415,
69
- "grad_norm": 0.34464704757071946,
70
  "learning_rate": 0.0003,
71
- "loss": 0.9233,
72
  "step": 9
73
  },
74
  {
75
  "epoch": 0.3686635944700461,
76
- "grad_norm": 0.20938281732289094,
77
  "learning_rate": 0.0003,
78
- "loss": 0.8031,
79
  "step": 10
80
  },
81
  {
82
  "epoch": 0.4055299539170507,
83
- "grad_norm": 0.20691041179399886,
84
  "learning_rate": 0.0003,
85
- "loss": 0.8318,
86
  "step": 11
87
  },
88
  {
89
  "epoch": 0.4423963133640553,
90
- "grad_norm": 0.16746745456565448,
91
  "learning_rate": 0.0003,
92
- "loss": 0.7613,
93
  "step": 12
94
  },
95
  {
96
  "epoch": 0.4792626728110599,
97
- "grad_norm": 0.16864034661331284,
98
  "learning_rate": 0.0003,
99
- "loss": 0.7453,
100
  "step": 13
101
  },
102
  {
103
  "epoch": 0.5161290322580645,
104
- "grad_norm": 0.09470883605671848,
105
  "learning_rate": 0.0003,
106
- "loss": 0.7272,
107
  "step": 14
108
  },
109
  {
110
  "epoch": 0.5529953917050692,
111
- "grad_norm": 0.10835665836968225,
112
  "learning_rate": 0.0003,
113
- "loss": 0.7714,
114
  "step": 15
115
  },
116
  {
117
  "epoch": 0.5898617511520737,
118
- "grad_norm": 0.13018771502313373,
119
  "learning_rate": 0.0003,
120
- "loss": 0.755,
121
  "step": 16
122
  },
123
  {
124
  "epoch": 0.6267281105990783,
125
- "grad_norm": 0.12000422783438623,
126
  "learning_rate": 0.0003,
127
- "loss": 0.7601,
128
  "step": 17
129
  },
130
  {
131
  "epoch": 0.663594470046083,
132
- "grad_norm": 0.10710145117472845,
133
  "learning_rate": 0.0003,
134
- "loss": 0.7243,
135
  "step": 18
136
  },
137
  {
138
  "epoch": 0.7004608294930875,
139
- "grad_norm": 0.10199222794197009,
140
  "learning_rate": 0.0003,
141
- "loss": 0.7617,
142
  "step": 19
143
  },
144
  {
145
  "epoch": 0.7373271889400922,
146
- "grad_norm": 0.10841261085434695,
147
  "learning_rate": 0.0003,
148
- "loss": 0.7061,
149
  "step": 20
150
  },
151
  {
152
  "epoch": 0.7741935483870968,
153
- "grad_norm": 0.10854446791930876,
154
  "learning_rate": 0.0003,
155
- "loss": 0.6664,
156
  "step": 21
157
  },
158
  {
159
  "epoch": 0.8110599078341014,
160
- "grad_norm": 0.12871352769383354,
161
  "learning_rate": 0.0003,
162
- "loss": 0.7521,
163
  "step": 22
164
  },
165
  {
166
  "epoch": 0.847926267281106,
167
- "grad_norm": 0.09262524917386825,
168
  "learning_rate": 0.0003,
169
- "loss": 0.7143,
170
  "step": 23
171
  },
172
  {
173
  "epoch": 0.8847926267281107,
174
- "grad_norm": 0.1007189769028823,
175
  "learning_rate": 0.0003,
176
- "loss": 0.7151,
177
  "step": 24
178
  },
179
  {
180
  "epoch": 0.9216589861751152,
181
- "grad_norm": 0.11039499658777108,
182
  "learning_rate": 0.0003,
183
- "loss": 0.668,
184
  "step": 25
185
  },
186
  {
187
  "epoch": 0.9585253456221198,
188
- "grad_norm": 0.09552065085357173,
189
  "learning_rate": 0.0003,
190
- "loss": 0.6846,
191
  "step": 26
192
  },
193
  {
194
  "epoch": 0.9953917050691244,
195
- "grad_norm": 0.09742131147780461,
196
  "learning_rate": 0.0003,
197
- "loss": 0.6828,
198
  "step": 27
199
  },
200
  {
201
  "epoch": 0.9953917050691244,
202
  "step": 27,
203
  "total_flos": 5728527974400.0,
204
- "train_loss": 0.8682256230601558,
205
- "train_runtime": 1037.3465,
206
- "train_samples_per_second": 0.837,
207
- "train_steps_per_second": 0.026
208
  }
209
  ],
210
  "logging_steps": 1.0,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.03686635944700461,
13
+ "grad_norm": 0.2873728095047169,
14
  "learning_rate": 0.0,
15
  "loss": 1.4734,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.07373271889400922,
20
+ "grad_norm": 0.27419596643661837,
21
  "learning_rate": 0.0003,
22
+ "loss": 1.279,
23
  "step": 2
24
  },
25
  {
26
  "epoch": 0.11059907834101383,
27
+ "grad_norm": 0.3351799319426796,
28
  "learning_rate": 0.0003,
29
+ "loss": 1.4582,
30
  "step": 3
31
  },
32
  {
33
  "epoch": 0.14746543778801843,
34
+ "grad_norm": 0.2789893810816952,
35
  "learning_rate": 0.0003,
36
+ "loss": 1.1216,
37
  "step": 4
38
  },
39
  {
40
  "epoch": 0.18433179723502305,
41
+ "grad_norm": 0.14190144952821382,
42
  "learning_rate": 0.0003,
43
+ "loss": 1.0121,
44
  "step": 5
45
  },
46
  {
47
  "epoch": 0.22119815668202766,
48
+ "grad_norm": 0.1542021738719797,
49
  "learning_rate": 0.0003,
50
+ "loss": 1.0723,
51
  "step": 6
52
  },
53
  {
54
  "epoch": 0.25806451612903225,
55
+ "grad_norm": 0.17476255091645262,
56
  "learning_rate": 0.0003,
57
+ "loss": 0.8962,
58
  "step": 7
59
  },
60
  {
61
  "epoch": 0.29493087557603687,
62
+ "grad_norm": 0.30183805297227384,
63
  "learning_rate": 0.0003,
64
+ "loss": 0.9577,
65
  "step": 8
66
  },
67
  {
68
  "epoch": 0.3317972350230415,
69
+ "grad_norm": 0.2945209326852545,
70
  "learning_rate": 0.0003,
71
+ "loss": 0.9143,
72
  "step": 9
73
  },
74
  {
75
  "epoch": 0.3686635944700461,
76
+ "grad_norm": 0.21480966699076806,
77
  "learning_rate": 0.0003,
78
+ "loss": 0.7952,
79
  "step": 10
80
  },
81
  {
82
  "epoch": 0.4055299539170507,
83
+ "grad_norm": 0.18078986894945484,
84
  "learning_rate": 0.0003,
85
+ "loss": 0.8234,
86
  "step": 11
87
  },
88
  {
89
  "epoch": 0.4423963133640553,
90
+ "grad_norm": 0.15453708977718567,
91
  "learning_rate": 0.0003,
92
+ "loss": 0.7589,
93
  "step": 12
94
  },
95
  {
96
  "epoch": 0.4792626728110599,
97
+ "grad_norm": 0.1631172234239537,
98
  "learning_rate": 0.0003,
99
+ "loss": 0.7419,
100
  "step": 13
101
  },
102
  {
103
  "epoch": 0.5161290322580645,
104
+ "grad_norm": 0.09781085387100458,
105
  "learning_rate": 0.0003,
106
+ "loss": 0.7239,
107
  "step": 14
108
  },
109
  {
110
  "epoch": 0.5529953917050692,
111
+ "grad_norm": 0.09897379010199117,
112
  "learning_rate": 0.0003,
113
+ "loss": 0.7673,
114
  "step": 15
115
  },
116
  {
117
  "epoch": 0.5898617511520737,
118
+ "grad_norm": 0.11558640849854486,
119
  "learning_rate": 0.0003,
120
+ "loss": 0.7533,
121
  "step": 16
122
  },
123
  {
124
  "epoch": 0.6267281105990783,
125
+ "grad_norm": 0.11345769354838794,
126
  "learning_rate": 0.0003,
127
+ "loss": 0.7581,
128
  "step": 17
129
  },
130
  {
131
  "epoch": 0.663594470046083,
132
+ "grad_norm": 0.1013501193678853,
133
  "learning_rate": 0.0003,
134
+ "loss": 0.7224,
135
  "step": 18
136
  },
137
  {
138
  "epoch": 0.7004608294930875,
139
+ "grad_norm": 0.09930580785134363,
140
  "learning_rate": 0.0003,
141
+ "loss": 0.7597,
142
  "step": 19
143
  },
144
  {
145
  "epoch": 0.7373271889400922,
146
+ "grad_norm": 0.10206714996240562,
147
  "learning_rate": 0.0003,
148
+ "loss": 0.704,
149
  "step": 20
150
  },
151
  {
152
  "epoch": 0.7741935483870968,
153
+ "grad_norm": 0.10775281367207125,
154
  "learning_rate": 0.0003,
155
+ "loss": 0.6639,
156
  "step": 21
157
  },
158
  {
159
  "epoch": 0.8110599078341014,
160
+ "grad_norm": 0.12015377273414085,
161
  "learning_rate": 0.0003,
162
+ "loss": 0.7494,
163
  "step": 22
164
  },
165
  {
166
  "epoch": 0.847926267281106,
167
+ "grad_norm": 0.08770642908913276,
168
  "learning_rate": 0.0003,
169
+ "loss": 0.7115,
170
  "step": 23
171
  },
172
  {
173
  "epoch": 0.8847926267281107,
174
+ "grad_norm": 0.135245894998221,
175
  "learning_rate": 0.0003,
176
+ "loss": 0.7169,
177
  "step": 24
178
  },
179
  {
180
  "epoch": 0.9216589861751152,
181
+ "grad_norm": 0.0993611544536447,
182
  "learning_rate": 0.0003,
183
+ "loss": 0.6667,
184
  "step": 25
185
  },
186
  {
187
  "epoch": 0.9585253456221198,
188
+ "grad_norm": 0.09795283307056235,
189
  "learning_rate": 0.0003,
190
+ "loss": 0.6859,
191
  "step": 26
192
  },
193
  {
194
  "epoch": 0.9953917050691244,
195
+ "grad_norm": 0.10408097730031732,
196
  "learning_rate": 0.0003,
197
+ "loss": 0.6844,
198
  "step": 27
199
  },
200
  {
201
  "epoch": 0.9953917050691244,
202
  "step": 27,
203
  "total_flos": 5728527974400.0,
204
+ "train_loss": 0.8656142420238919,
205
+ "train_runtime": 448.8389,
206
+ "train_samples_per_second": 1.934,
207
+ "train_steps_per_second": 0.06
208
  }
209
  ],
210
  "logging_steps": 1.0,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44b57c4122ef14a59ab2c829b81e181b998d7469d4fea10759be8a6da34dc0f4
3
  size 7480
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28a9ab02716a3e49f85fb4031db8b670ed5bcba6d2cefceaf7c7bea7ce10720b
3
  size 7480