hugodk-sch
commited on
Commit
•
564490c
1
Parent(s):
2511128
Model save
Browse files- README.md +13 -16
- adapter_model.safetensors +1 -1
- all_results.json +2 -15
- train_results.json +2 -2
- trainer_state.json +173 -173
README.md
CHANGED
@@ -1,13 +1,10 @@
|
|
1 |
---
|
2 |
library_name: peft
|
3 |
tags:
|
4 |
-
- alignment-handbook
|
5 |
- trl
|
6 |
- dpo
|
7 |
- generated_from_trainer
|
8 |
base_model: NbAiLab/nb-gpt-j-6B-v2
|
9 |
-
datasets:
|
10 |
-
- hugodk-sch/aftonposten_title_prefs
|
11 |
model-index:
|
12 |
- name: aftonposten-6b-align-scan
|
13 |
results: []
|
@@ -18,17 +15,17 @@ should probably proofread and complete it, then remove this comment. -->
|
|
18 |
|
19 |
# aftonposten-6b-align-scan
|
20 |
|
21 |
-
This model is a fine-tuned version of [
|
22 |
It achieves the following results on the evaluation set:
|
23 |
-
- Loss: 0.
|
24 |
-
- Rewards/chosen: 0.
|
25 |
-
- Rewards/rejected: 0.
|
26 |
-
- Rewards/accuracies: 0.
|
27 |
-
- Rewards/margins: 0.
|
28 |
-
- Logps/rejected: -37.
|
29 |
-
- Logps/chosen: -34.
|
30 |
-
- Logits/rejected: -2.
|
31 |
-
- Logits/chosen: -2.
|
32 |
|
33 |
## Model description
|
34 |
|
@@ -63,9 +60,9 @@ The following hyperparameters were used during training:
|
|
63 |
|
64 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
65 |
|:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
66 |
-
| 1.
|
67 |
-
| 0.
|
68 |
-
| 0.
|
69 |
|
70 |
|
71 |
### Framework versions
|
|
|
1 |
---
|
2 |
library_name: peft
|
3 |
tags:
|
|
|
4 |
- trl
|
5 |
- dpo
|
6 |
- generated_from_trainer
|
7 |
base_model: NbAiLab/nb-gpt-j-6B-v2
|
|
|
|
|
8 |
model-index:
|
9 |
- name: aftonposten-6b-align-scan
|
10 |
results: []
|
|
|
15 |
|
16 |
# aftonposten-6b-align-scan
|
17 |
|
18 |
+
This model is a fine-tuned version of [NbAiLab/nb-gpt-j-6B-v2](https://huggingface.co/NbAiLab/nb-gpt-j-6B-v2) on an unknown dataset.
|
19 |
It achieves the following results on the evaluation set:
|
20 |
+
- Loss: 0.9999
|
21 |
+
- Rewards/chosen: 0.0034
|
22 |
+
- Rewards/rejected: 0.0033
|
23 |
+
- Rewards/accuracies: 0.4904
|
24 |
+
- Rewards/margins: 0.0001
|
25 |
+
- Logps/rejected: -37.5002
|
26 |
+
- Logps/chosen: -34.0177
|
27 |
+
- Logits/rejected: -2.2386
|
28 |
+
- Logits/chosen: -2.2435
|
29 |
|
30 |
## Model description
|
31 |
|
|
|
60 |
|
61 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
62 |
|:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
63 |
+
| 1.0006 | 0.26 | 100 | 1.0016 | 0.0023 | 0.0039 | 0.4983 | -0.0016 | -37.4972 | -34.0232 | -2.2384 | -2.2433 |
|
64 |
+
| 0.9981 | 0.52 | 200 | 0.9966 | 0.0032 | -0.0002 | 0.5328 | 0.0033 | -37.5175 | -34.0187 | -2.2389 | -2.2438 |
|
65 |
+
| 0.9944 | 0.78 | 300 | 0.9999 | 0.0034 | 0.0033 | 0.4904 | 0.0001 | -37.5002 | -34.0177 | -2.2386 | -2.2435 |
|
66 |
|
67 |
|
68 |
### Framework versions
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 176183216
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:726700f20fcb28f7858cd1aa59e0cb2bd53a0fe4b7d2b662333531d42f0321bf
|
3 |
size 176183216
|
all_results.json
CHANGED
@@ -1,20 +1,7 @@
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
-
"
|
4 |
-
"
|
5 |
-
"eval_logps/chosen": -34.0224494934082,
|
6 |
-
"eval_logps/rejected": -37.51002883911133,
|
7 |
-
"eval_loss": 0.9995055198669434,
|
8 |
-
"eval_rewards/accuracies": 0.5141196250915527,
|
9 |
-
"eval_rewards/chosen": 0.0012102578766644,
|
10 |
-
"eval_rewards/margins": 0.0005511684576049447,
|
11 |
-
"eval_rewards/rejected": 0.0006590897683054209,
|
12 |
-
"eval_runtime": 145.6584,
|
13 |
-
"eval_samples": 343,
|
14 |
-
"eval_samples_per_second": 2.355,
|
15 |
-
"eval_steps_per_second": 0.295,
|
16 |
-
"train_loss": 0.9991211135666092,
|
17 |
-
"train_runtime": 3253.9902,
|
18 |
"train_samples": 3079,
|
19 |
"train_samples_per_second": 0.946,
|
20 |
"train_steps_per_second": 0.118
|
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
+
"train_loss": 0.998242255619594,
|
4 |
+
"train_runtime": 3253.3829,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
"train_samples": 3079,
|
6 |
"train_samples_per_second": 0.946,
|
7 |
"train_steps_per_second": 0.118
|
train_results.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime": 3253.
|
5 |
"train_samples": 3079,
|
6 |
"train_samples_per_second": 0.946,
|
7 |
"train_steps_per_second": 0.118
|
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
+
"train_loss": 0.998242255619594,
|
4 |
+
"train_runtime": 3253.3829,
|
5 |
"train_samples": 3079,
|
6 |
"train_samples_per_second": 0.946,
|
7 |
"train_steps_per_second": 0.118
|
trainer_state.json
CHANGED
@@ -29,11 +29,11 @@
|
|
29 |
"logits/rejected": -1.8703795671463013,
|
30 |
"logps/chosen": -36.988380432128906,
|
31 |
"logps/rejected": -33.66728210449219,
|
32 |
-
"loss": 0.
|
33 |
"rewards/accuracies": 0.5694444179534912,
|
34 |
-
"rewards/chosen": 0.
|
35 |
-
"rewards/margins": 0.
|
36 |
-
"rewards/rejected": -0.
|
37 |
"step": 10
|
38 |
},
|
39 |
{
|
@@ -43,11 +43,11 @@
|
|
43 |
"logits/rejected": -1.999983549118042,
|
44 |
"logps/chosen": -29.625896453857422,
|
45 |
"logps/rejected": -29.035802841186523,
|
46 |
-
"loss": 1.
|
47 |
"rewards/accuracies": 0.4375,
|
48 |
-
"rewards/chosen": 0.
|
49 |
-
"rewards/margins": -0.
|
50 |
-
"rewards/rejected": 0.
|
51 |
"step": 20
|
52 |
},
|
53 |
{
|
@@ -57,11 +57,11 @@
|
|
57 |
"logits/rejected": -1.917249321937561,
|
58 |
"logps/chosen": -31.421478271484375,
|
59 |
"logps/rejected": -33.2115364074707,
|
60 |
-
"loss": 1.
|
61 |
"rewards/accuracies": 0.4625000059604645,
|
62 |
-
"rewards/chosen": -0.
|
63 |
-
"rewards/margins": -0.
|
64 |
-
"rewards/rejected": 0.
|
65 |
"step": 30
|
66 |
},
|
67 |
{
|
@@ -71,11 +71,11 @@
|
|
71 |
"logits/rejected": -2.008178949356079,
|
72 |
"logps/chosen": -32.59435272216797,
|
73 |
"logps/rejected": -32.49193572998047,
|
74 |
-
"loss": 1.
|
75 |
"rewards/accuracies": 0.44999998807907104,
|
76 |
-
"rewards/chosen": -0.
|
77 |
-
"rewards/margins": -0.
|
78 |
-
"rewards/rejected": 0.
|
79 |
"step": 40
|
80 |
},
|
81 |
{
|
@@ -85,11 +85,11 @@
|
|
85 |
"logits/rejected": -1.8537908792495728,
|
86 |
"logps/chosen": -33.56566619873047,
|
87 |
"logps/rejected": -35.423240661621094,
|
88 |
-
"loss": 1.
|
89 |
"rewards/accuracies": 0.4375,
|
90 |
-
"rewards/chosen": -0.
|
91 |
-
"rewards/margins": -0.
|
92 |
-
"rewards/rejected": 0.
|
93 |
"step": 50
|
94 |
},
|
95 |
{
|
@@ -99,11 +99,11 @@
|
|
99 |
"logits/rejected": -1.9468472003936768,
|
100 |
"logps/chosen": -32.59955596923828,
|
101 |
"logps/rejected": -33.1828498840332,
|
102 |
-
"loss": 0.
|
103 |
"rewards/accuracies": 0.512499988079071,
|
104 |
-
"rewards/chosen":
|
105 |
-
"rewards/margins": 0.
|
106 |
-
"rewards/rejected": -0.
|
107 |
"step": 60
|
108 |
},
|
109 |
{
|
@@ -113,11 +113,11 @@
|
|
113 |
"logits/rejected": -2.084862232208252,
|
114 |
"logps/chosen": -33.98878860473633,
|
115 |
"logps/rejected": -36.574462890625,
|
116 |
-
"loss": 0.
|
117 |
"rewards/accuracies": 0.512499988079071,
|
118 |
-
"rewards/chosen": -
|
119 |
-
"rewards/margins": 0.
|
120 |
-
"rewards/rejected": -0.
|
121 |
"step": 70
|
122 |
},
|
123 |
{
|
@@ -127,11 +127,11 @@
|
|
127 |
"logits/rejected": -1.9457191228866577,
|
128 |
"logps/chosen": -34.40068054199219,
|
129 |
"logps/rejected": -34.5762939453125,
|
130 |
-
"loss": 0.
|
131 |
"rewards/accuracies": 0.5249999761581421,
|
132 |
-
"rewards/chosen": 0.
|
133 |
-
"rewards/margins": 0.
|
134 |
-
"rewards/rejected": 0.
|
135 |
"step": 80
|
136 |
},
|
137 |
{
|
@@ -141,11 +141,11 @@
|
|
141 |
"logits/rejected": -1.9552500247955322,
|
142 |
"logps/chosen": -32.460357666015625,
|
143 |
"logps/rejected": -32.354434967041016,
|
144 |
-
"loss": 0.
|
145 |
"rewards/accuracies": 0.512499988079071,
|
146 |
-
"rewards/chosen": 0.
|
147 |
-
"rewards/margins": 0.
|
148 |
-
"rewards/rejected": 0.
|
149 |
"step": 90
|
150 |
},
|
151 |
{
|
@@ -155,11 +155,11 @@
|
|
155 |
"logits/rejected": -2.0472888946533203,
|
156 |
"logps/chosen": -32.23810958862305,
|
157 |
"logps/rejected": -31.260278701782227,
|
158 |
-
"loss": 1.
|
159 |
"rewards/accuracies": 0.4625000059604645,
|
160 |
-
"rewards/chosen": 0.
|
161 |
-
"rewards/margins": -0.
|
162 |
-
"rewards/rejected": 0.
|
163 |
"step": 100
|
164 |
},
|
165 |
{
|
@@ -168,14 +168,14 @@
|
|
168 |
"eval_logits/rejected": -2.238436222076416,
|
169 |
"eval_logps/chosen": -34.023216247558594,
|
170 |
"eval_logps/rejected": -37.49723434448242,
|
171 |
-
"eval_loss": 1.
|
172 |
"eval_rewards/accuracies": 0.49833887815475464,
|
173 |
-
"eval_rewards/chosen": 0.
|
174 |
-
"eval_rewards/margins": -0.
|
175 |
-
"eval_rewards/rejected": 0.
|
176 |
-
"eval_runtime":
|
177 |
-
"eval_samples_per_second": 2.
|
178 |
-
"eval_steps_per_second": 0.
|
179 |
"step": 100
|
180 |
},
|
181 |
{
|
@@ -185,11 +185,11 @@
|
|
185 |
"logits/rejected": -2.0027499198913574,
|
186 |
"logps/chosen": -33.2365837097168,
|
187 |
"logps/rejected": -34.01953125,
|
188 |
-
"loss": 1.
|
189 |
"rewards/accuracies": 0.4625000059604645,
|
190 |
-
"rewards/chosen": 0.
|
191 |
-
"rewards/margins": -0.
|
192 |
-
"rewards/rejected": 0.
|
193 |
"step": 110
|
194 |
},
|
195 |
{
|
@@ -199,11 +199,11 @@
|
|
199 |
"logits/rejected": -2.0083022117614746,
|
200 |
"logps/chosen": -32.457847595214844,
|
201 |
"logps/rejected": -32.18357467651367,
|
202 |
-
"loss": 0.
|
203 |
"rewards/accuracies": 0.550000011920929,
|
204 |
-
"rewards/chosen": -0.
|
205 |
-
"rewards/margins": 0.
|
206 |
-
"rewards/rejected": -0.
|
207 |
"step": 120
|
208 |
},
|
209 |
{
|
@@ -213,11 +213,11 @@
|
|
213 |
"logits/rejected": -2.038203001022339,
|
214 |
"logps/chosen": -30.475027084350586,
|
215 |
"logps/rejected": -32.046302795410156,
|
216 |
-
"loss": 1.
|
217 |
"rewards/accuracies": 0.48750001192092896,
|
218 |
-
"rewards/chosen":
|
219 |
-
"rewards/margins": -0.
|
220 |
-
"rewards/rejected": 0.
|
221 |
"step": 130
|
222 |
},
|
223 |
{
|
@@ -227,11 +227,11 @@
|
|
227 |
"logits/rejected": -1.9872560501098633,
|
228 |
"logps/chosen": -31.407278060913086,
|
229 |
"logps/rejected": -32.543296813964844,
|
230 |
-
"loss": 0.
|
231 |
"rewards/accuracies": 0.48750001192092896,
|
232 |
-
"rewards/chosen": 0.
|
233 |
-
"rewards/margins": 0.
|
234 |
-
"rewards/rejected": -0.
|
235 |
"step": 140
|
236 |
},
|
237 |
{
|
@@ -241,11 +241,11 @@
|
|
241 |
"logits/rejected": -1.891632080078125,
|
242 |
"logps/chosen": -34.20501708984375,
|
243 |
"logps/rejected": -34.77235412597656,
|
244 |
-
"loss": 0.
|
245 |
"rewards/accuracies": 0.4625000059604645,
|
246 |
-
"rewards/chosen": -0.
|
247 |
-
"rewards/margins": 0.
|
248 |
-
"rewards/rejected": -0.
|
249 |
"step": 150
|
250 |
},
|
251 |
{
|
@@ -255,11 +255,11 @@
|
|
255 |
"logits/rejected": -1.939327597618103,
|
256 |
"logps/chosen": -36.144107818603516,
|
257 |
"logps/rejected": -32.72822570800781,
|
258 |
-
"loss": 0.
|
259 |
"rewards/accuracies": 0.5375000238418579,
|
260 |
-
"rewards/chosen": 0.
|
261 |
-
"rewards/margins": 0.
|
262 |
-
"rewards/rejected": -0.
|
263 |
"step": 160
|
264 |
},
|
265 |
{
|
@@ -269,11 +269,11 @@
|
|
269 |
"logits/rejected": -2.0346803665161133,
|
270 |
"logps/chosen": -33.771934509277344,
|
271 |
"logps/rejected": -31.371145248413086,
|
272 |
-
"loss": 0.
|
273 |
"rewards/accuracies": 0.5375000238418579,
|
274 |
-
"rewards/chosen": 0.
|
275 |
-
"rewards/margins": 0.
|
276 |
-
"rewards/rejected": -0.
|
277 |
"step": 170
|
278 |
},
|
279 |
{
|
@@ -283,11 +283,11 @@
|
|
283 |
"logits/rejected": -2.0528526306152344,
|
284 |
"logps/chosen": -32.524593353271484,
|
285 |
"logps/rejected": -32.510643005371094,
|
286 |
-
"loss": 0.
|
287 |
"rewards/accuracies": 0.550000011920929,
|
288 |
-
"rewards/chosen": 0.
|
289 |
-
"rewards/margins": 0.
|
290 |
-
"rewards/rejected": -0.
|
291 |
"step": 180
|
292 |
},
|
293 |
{
|
@@ -297,11 +297,11 @@
|
|
297 |
"logits/rejected": -2.0457024574279785,
|
298 |
"logps/chosen": -31.492746353149414,
|
299 |
"logps/rejected": -31.319293975830078,
|
300 |
-
"loss": 1.
|
301 |
"rewards/accuracies": 0.5,
|
302 |
-
"rewards/chosen": -0.
|
303 |
-
"rewards/margins": -0.
|
304 |
-
"rewards/rejected": -0.
|
305 |
"step": 190
|
306 |
},
|
307 |
{
|
@@ -311,11 +311,11 @@
|
|
311 |
"logits/rejected": -1.9232347011566162,
|
312 |
"logps/chosen": -31.5926513671875,
|
313 |
"logps/rejected": -32.78697204589844,
|
314 |
-
"loss": 0.
|
315 |
"rewards/accuracies": 0.5874999761581421,
|
316 |
-
"rewards/chosen": 0.
|
317 |
-
"rewards/margins": 0.
|
318 |
-
"rewards/rejected": 0.
|
319 |
"step": 200
|
320 |
},
|
321 |
{
|
@@ -324,13 +324,13 @@
|
|
324 |
"eval_logits/rejected": -2.2389235496520996,
|
325 |
"eval_logps/chosen": -34.018714904785156,
|
326 |
"eval_logps/rejected": -37.517478942871094,
|
327 |
-
"eval_loss": 0.
|
328 |
"eval_rewards/accuracies": 0.5328072905540466,
|
329 |
-
"eval_rewards/chosen": 0.
|
330 |
-
"eval_rewards/margins": 0.
|
331 |
-
"eval_rewards/rejected": -
|
332 |
-
"eval_runtime": 145.
|
333 |
-
"eval_samples_per_second": 2.
|
334 |
"eval_steps_per_second": 0.295,
|
335 |
"step": 200
|
336 |
},
|
@@ -341,11 +341,11 @@
|
|
341 |
"logits/rejected": -2.042539596557617,
|
342 |
"logps/chosen": -31.948400497436523,
|
343 |
"logps/rejected": -33.86983871459961,
|
344 |
-
"loss": 0.
|
345 |
"rewards/accuracies": 0.5,
|
346 |
-
"rewards/chosen": 0.
|
347 |
-
"rewards/margins": 0.
|
348 |
-
"rewards/rejected": -0.
|
349 |
"step": 210
|
350 |
},
|
351 |
{
|
@@ -355,11 +355,11 @@
|
|
355 |
"logits/rejected": -1.9399843215942383,
|
356 |
"logps/chosen": -30.099853515625,
|
357 |
"logps/rejected": -31.55409812927246,
|
358 |
-
"loss": 0.
|
359 |
"rewards/accuracies": 0.5249999761581421,
|
360 |
-
"rewards/chosen": 0.
|
361 |
-
"rewards/margins": 0.
|
362 |
-
"rewards/rejected": 0.
|
363 |
"step": 220
|
364 |
},
|
365 |
{
|
@@ -369,11 +369,11 @@
|
|
369 |
"logits/rejected": -1.9875112771987915,
|
370 |
"logps/chosen": -33.387638092041016,
|
371 |
"logps/rejected": -31.554845809936523,
|
372 |
-
"loss": 0.
|
373 |
"rewards/accuracies": 0.550000011920929,
|
374 |
-
"rewards/chosen": 0.
|
375 |
-
"rewards/margins": 0.
|
376 |
-
"rewards/rejected": -0.
|
377 |
"step": 230
|
378 |
},
|
379 |
{
|
@@ -383,11 +383,11 @@
|
|
383 |
"logits/rejected": -1.9607274532318115,
|
384 |
"logps/chosen": -34.158443450927734,
|
385 |
"logps/rejected": -34.963783264160156,
|
386 |
-
"loss": 0.
|
387 |
"rewards/accuracies": 0.512499988079071,
|
388 |
-
"rewards/chosen": -0.
|
389 |
-
"rewards/margins": 0.
|
390 |
-
"rewards/rejected": -0.
|
391 |
"step": 240
|
392 |
},
|
393 |
{
|
@@ -397,11 +397,11 @@
|
|
397 |
"logits/rejected": -2.0210862159729004,
|
398 |
"logps/chosen": -32.9254035949707,
|
399 |
"logps/rejected": -36.251712799072266,
|
400 |
-
"loss": 0.
|
401 |
"rewards/accuracies": 0.5375000238418579,
|
402 |
-
"rewards/chosen": -
|
403 |
-
"rewards/margins": 0.
|
404 |
-
"rewards/rejected": -0.
|
405 |
"step": 250
|
406 |
},
|
407 |
{
|
@@ -411,11 +411,11 @@
|
|
411 |
"logits/rejected": -1.8886839151382446,
|
412 |
"logps/chosen": -34.194557189941406,
|
413 |
"logps/rejected": -35.51445770263672,
|
414 |
-
"loss": 0.
|
415 |
"rewards/accuracies": 0.48750001192092896,
|
416 |
-
"rewards/chosen": -0.
|
417 |
-
"rewards/margins": 0.
|
418 |
-
"rewards/rejected": -0.
|
419 |
"step": 260
|
420 |
},
|
421 |
{
|
@@ -425,11 +425,11 @@
|
|
425 |
"logits/rejected": -1.8734045028686523,
|
426 |
"logps/chosen": -34.40558624267578,
|
427 |
"logps/rejected": -31.752349853515625,
|
428 |
-
"loss": 1.
|
429 |
"rewards/accuracies": 0.4625000059604645,
|
430 |
-
"rewards/chosen": -0.
|
431 |
-
"rewards/margins": -0.
|
432 |
-
"rewards/rejected": 0.
|
433 |
"step": 270
|
434 |
},
|
435 |
{
|
@@ -439,11 +439,11 @@
|
|
439 |
"logits/rejected": -1.9693737030029297,
|
440 |
"logps/chosen": -35.33230209350586,
|
441 |
"logps/rejected": -31.845691680908203,
|
442 |
-
"loss": 0.
|
443 |
"rewards/accuracies": 0.512499988079071,
|
444 |
-
"rewards/chosen": 0.
|
445 |
-
"rewards/margins": 0.
|
446 |
-
"rewards/rejected": -0.
|
447 |
"step": 280
|
448 |
},
|
449 |
{
|
@@ -453,11 +453,11 @@
|
|
453 |
"logits/rejected": -2.060606002807617,
|
454 |
"logps/chosen": -30.907390594482422,
|
455 |
"logps/rejected": -32.64055252075195,
|
456 |
-
"loss": 0.
|
457 |
"rewards/accuracies": 0.550000011920929,
|
458 |
-
"rewards/chosen": 0.
|
459 |
-
"rewards/margins": 0.
|
460 |
-
"rewards/rejected": 0.
|
461 |
"step": 290
|
462 |
},
|
463 |
{
|
@@ -467,11 +467,11 @@
|
|
467 |
"logits/rejected": -1.9440828561782837,
|
468 |
"logps/chosen": -32.894561767578125,
|
469 |
"logps/rejected": -30.812387466430664,
|
470 |
-
"loss": 0.
|
471 |
"rewards/accuracies": 0.5,
|
472 |
-
"rewards/chosen": 0.
|
473 |
-
"rewards/margins": 0.
|
474 |
-
"rewards/rejected": 0.
|
475 |
"step": 300
|
476 |
},
|
477 |
{
|
@@ -480,13 +480,13 @@
|
|
480 |
"eval_logits/rejected": -2.2386035919189453,
|
481 |
"eval_logps/chosen": -34.017669677734375,
|
482 |
"eval_logps/rejected": -37.50018310546875,
|
483 |
-
"eval_loss": 0.
|
484 |
"eval_rewards/accuracies": 0.490448534488678,
|
485 |
-
"eval_rewards/chosen": 0.
|
486 |
-
"eval_rewards/margins":
|
487 |
-
"eval_rewards/rejected": 0.
|
488 |
-
"eval_runtime": 145.
|
489 |
-
"eval_samples_per_second": 2.
|
490 |
"eval_steps_per_second": 0.295,
|
491 |
"step": 300
|
492 |
},
|
@@ -497,11 +497,11 @@
|
|
497 |
"logits/rejected": -1.925451636314392,
|
498 |
"logps/chosen": -31.603496551513672,
|
499 |
"logps/rejected": -33.734046936035156,
|
500 |
-
"loss": 1.
|
501 |
"rewards/accuracies": 0.4749999940395355,
|
502 |
-
"rewards/chosen": -
|
503 |
-
"rewards/margins": -0.
|
504 |
-
"rewards/rejected": 0.
|
505 |
"step": 310
|
506 |
},
|
507 |
{
|
@@ -511,11 +511,11 @@
|
|
511 |
"logits/rejected": -1.9679291248321533,
|
512 |
"logps/chosen": -34.585323333740234,
|
513 |
"logps/rejected": -33.57084274291992,
|
514 |
-
"loss": 0.
|
515 |
"rewards/accuracies": 0.5375000238418579,
|
516 |
-
"rewards/chosen": 0.
|
517 |
-
"rewards/margins": 0.
|
518 |
-
"rewards/rejected": -0.
|
519 |
"step": 320
|
520 |
},
|
521 |
{
|
@@ -525,11 +525,11 @@
|
|
525 |
"logits/rejected": -2.014427661895752,
|
526 |
"logps/chosen": -33.49116516113281,
|
527 |
"logps/rejected": -32.47978973388672,
|
528 |
-
"loss": 1.
|
529 |
"rewards/accuracies": 0.4375,
|
530 |
-
"rewards/chosen": -0.
|
531 |
-
"rewards/margins": -0.
|
532 |
-
"rewards/rejected": -
|
533 |
"step": 330
|
534 |
},
|
535 |
{
|
@@ -539,11 +539,11 @@
|
|
539 |
"logits/rejected": -2.0872654914855957,
|
540 |
"logps/chosen": -34.18492889404297,
|
541 |
"logps/rejected": -33.08319854736328,
|
542 |
-
"loss": 1.
|
543 |
"rewards/accuracies": 0.42500001192092896,
|
544 |
-
"rewards/chosen": -0.
|
545 |
-
"rewards/margins": -0.
|
546 |
-
"rewards/rejected": 0.
|
547 |
"step": 340
|
548 |
},
|
549 |
{
|
@@ -553,11 +553,11 @@
|
|
553 |
"logits/rejected": -1.9736032485961914,
|
554 |
"logps/chosen": -33.23271942138672,
|
555 |
"logps/rejected": -32.4765510559082,
|
556 |
-
"loss": 0.
|
557 |
"rewards/accuracies": 0.612500011920929,
|
558 |
-
"rewards/chosen": 0.
|
559 |
-
"rewards/margins": 0.
|
560 |
-
"rewards/rejected": -0.
|
561 |
"step": 350
|
562 |
},
|
563 |
{
|
@@ -567,11 +567,11 @@
|
|
567 |
"logits/rejected": -1.940913438796997,
|
568 |
"logps/chosen": -32.22040939331055,
|
569 |
"logps/rejected": -35.28728103637695,
|
570 |
-
"loss": 1.
|
571 |
"rewards/accuracies": 0.48750001192092896,
|
572 |
-
"rewards/chosen": -0.
|
573 |
-
"rewards/margins": -0.
|
574 |
-
"rewards/rejected": 0.
|
575 |
"step": 360
|
576 |
},
|
577 |
{
|
@@ -581,11 +581,11 @@
|
|
581 |
"logits/rejected": -2.0630898475646973,
|
582 |
"logps/chosen": -33.63695526123047,
|
583 |
"logps/rejected": -29.226470947265625,
|
584 |
-
"loss": 0.
|
585 |
"rewards/accuracies": 0.5625,
|
586 |
-
"rewards/chosen": 0.
|
587 |
-
"rewards/margins": 0.
|
588 |
-
"rewards/rejected": -
|
589 |
"step": 370
|
590 |
},
|
591 |
{
|
@@ -595,19 +595,19 @@
|
|
595 |
"logits/rejected": -1.9310123920440674,
|
596 |
"logps/chosen": -34.243560791015625,
|
597 |
"logps/rejected": -30.892742156982422,
|
598 |
-
"loss": 0.
|
599 |
"rewards/accuracies": 0.5249999761581421,
|
600 |
-
"rewards/chosen": -0.
|
601 |
-
"rewards/margins": 0.
|
602 |
-
"rewards/rejected": -0.
|
603 |
"step": 380
|
604 |
},
|
605 |
{
|
606 |
"epoch": 1.0,
|
607 |
"step": 385,
|
608 |
"total_flos": 0.0,
|
609 |
-
"train_loss": 0.
|
610 |
-
"train_runtime": 3253.
|
611 |
"train_samples_per_second": 0.946,
|
612 |
"train_steps_per_second": 0.118
|
613 |
}
|
|
|
29 |
"logits/rejected": -1.8703795671463013,
|
30 |
"logps/chosen": -36.988380432128906,
|
31 |
"logps/rejected": -33.66728210449219,
|
32 |
+
"loss": 0.9889,
|
33 |
"rewards/accuracies": 0.5694444179534912,
|
34 |
+
"rewards/chosen": 0.0036439618561416864,
|
35 |
+
"rewards/margins": 0.01108560897409916,
|
36 |
+
"rewards/rejected": -0.007441645488142967,
|
37 |
"step": 10
|
38 |
},
|
39 |
{
|
|
|
43 |
"logits/rejected": -1.999983549118042,
|
44 |
"logps/chosen": -29.625896453857422,
|
45 |
"logps/rejected": -29.035802841186523,
|
46 |
+
"loss": 1.0023,
|
47 |
"rewards/accuracies": 0.4375,
|
48 |
+
"rewards/chosen": 0.0032594085205346346,
|
49 |
+
"rewards/margins": -0.002268626820296049,
|
50 |
+
"rewards/rejected": 0.00552803510800004,
|
51 |
"step": 20
|
52 |
},
|
53 |
{
|
|
|
57 |
"logits/rejected": -1.917249321937561,
|
58 |
"logps/chosen": -31.421478271484375,
|
59 |
"logps/rejected": -33.2115364074707,
|
60 |
+
"loss": 1.0025,
|
61 |
"rewards/accuracies": 0.4625000059604645,
|
62 |
+
"rewards/chosen": -0.0010770887602120638,
|
63 |
+
"rewards/margins": -0.0024619889445602894,
|
64 |
+
"rewards/rejected": 0.001384900533594191,
|
65 |
"step": 30
|
66 |
},
|
67 |
{
|
|
|
71 |
"logits/rejected": -2.008178949356079,
|
72 |
"logps/chosen": -32.59435272216797,
|
73 |
"logps/rejected": -32.49193572998047,
|
74 |
+
"loss": 1.0077,
|
75 |
"rewards/accuracies": 0.44999998807907104,
|
76 |
+
"rewards/chosen": -0.0035086136776953936,
|
77 |
+
"rewards/margins": -0.007655883673578501,
|
78 |
+
"rewards/rejected": 0.004147270228713751,
|
79 |
"step": 40
|
80 |
},
|
81 |
{
|
|
|
85 |
"logits/rejected": -1.8537908792495728,
|
86 |
"logps/chosen": -33.56566619873047,
|
87 |
"logps/rejected": -35.423240661621094,
|
88 |
+
"loss": 1.0075,
|
89 |
"rewards/accuracies": 0.4375,
|
90 |
+
"rewards/chosen": -0.0016377497231587768,
|
91 |
+
"rewards/margins": -0.007458710577338934,
|
92 |
+
"rewards/rejected": 0.005820960737764835,
|
93 |
"step": 50
|
94 |
},
|
95 |
{
|
|
|
99 |
"logits/rejected": -1.9468472003936768,
|
100 |
"logps/chosen": -32.59955596923828,
|
101 |
"logps/rejected": -33.1828498840332,
|
102 |
+
"loss": 0.9976,
|
103 |
"rewards/accuracies": 0.512499988079071,
|
104 |
+
"rewards/chosen": 0.00012556914589367807,
|
105 |
+
"rewards/margins": 0.0024188074748963118,
|
106 |
+
"rewards/rejected": -0.0022932388819754124,
|
107 |
"step": 60
|
108 |
},
|
109 |
{
|
|
|
113 |
"logits/rejected": -2.084862232208252,
|
114 |
"logps/chosen": -33.98878860473633,
|
115 |
"logps/rejected": -36.574462890625,
|
116 |
+
"loss": 0.9989,
|
117 |
"rewards/accuracies": 0.512499988079071,
|
118 |
+
"rewards/chosen": -6.058474536985159e-05,
|
119 |
+
"rewards/margins": 0.0010579143417999148,
|
120 |
+
"rewards/rejected": -0.0011184990871697664,
|
121 |
"step": 70
|
122 |
},
|
123 |
{
|
|
|
127 |
"logits/rejected": -1.9457191228866577,
|
128 |
"logps/chosen": -34.40068054199219,
|
129 |
"logps/rejected": -34.5762939453125,
|
130 |
+
"loss": 0.9978,
|
131 |
"rewards/accuracies": 0.5249999761581421,
|
132 |
+
"rewards/chosen": 0.005425452254712582,
|
133 |
+
"rewards/margins": 0.0022025699727237225,
|
134 |
+
"rewards/rejected": 0.0032228827476501465,
|
135 |
"step": 80
|
136 |
},
|
137 |
{
|
|
|
141 |
"logits/rejected": -1.9552500247955322,
|
142 |
"logps/chosen": -32.460357666015625,
|
143 |
"logps/rejected": -32.354434967041016,
|
144 |
+
"loss": 0.9995,
|
145 |
"rewards/accuracies": 0.512499988079071,
|
146 |
+
"rewards/chosen": 0.0014136198442429304,
|
147 |
+
"rewards/margins": 0.0005185690824873745,
|
148 |
+
"rewards/rejected": 0.0008950509363785386,
|
149 |
"step": 90
|
150 |
},
|
151 |
{
|
|
|
155 |
"logits/rejected": -2.0472888946533203,
|
156 |
"logps/chosen": -32.23810958862305,
|
157 |
"logps/rejected": -31.260278701782227,
|
158 |
+
"loss": 1.0006,
|
159 |
"rewards/accuracies": 0.4625000059604645,
|
160 |
+
"rewards/chosen": 0.0005457091028802097,
|
161 |
+
"rewards/margins": -0.0006409892230294645,
|
162 |
+
"rewards/rejected": 0.0011866979766637087,
|
163 |
"step": 100
|
164 |
},
|
165 |
{
|
|
|
168 |
"eval_logits/rejected": -2.238436222076416,
|
169 |
"eval_logps/chosen": -34.023216247558594,
|
170 |
"eval_logps/rejected": -37.49723434448242,
|
171 |
+
"eval_loss": 1.0016428232192993,
|
172 |
"eval_rewards/accuracies": 0.49833887815475464,
|
173 |
+
"eval_rewards/chosen": 0.0022677299566566944,
|
174 |
+
"eval_rewards/margins": -0.0016096002655103803,
|
175 |
+
"eval_rewards/rejected": 0.0038773303385823965,
|
176 |
+
"eval_runtime": 145.8128,
|
177 |
+
"eval_samples_per_second": 2.352,
|
178 |
+
"eval_steps_per_second": 0.295,
|
179 |
"step": 100
|
180 |
},
|
181 |
{
|
|
|
185 |
"logits/rejected": -2.0027499198913574,
|
186 |
"logps/chosen": -33.2365837097168,
|
187 |
"logps/rejected": -34.01953125,
|
188 |
+
"loss": 1.0005,
|
189 |
"rewards/accuracies": 0.4625000059604645,
|
190 |
+
"rewards/chosen": 0.0014460014645010233,
|
191 |
+
"rewards/margins": -0.0005222518229857087,
|
192 |
+
"rewards/rejected": 0.0019682529382407665,
|
193 |
"step": 110
|
194 |
},
|
195 |
{
|
|
|
199 |
"logits/rejected": -2.0083022117614746,
|
200 |
"logps/chosen": -32.457847595214844,
|
201 |
"logps/rejected": -32.18357467651367,
|
202 |
+
"loss": 0.9982,
|
203 |
"rewards/accuracies": 0.550000011920929,
|
204 |
+
"rewards/chosen": -0.0025884758215397596,
|
205 |
+
"rewards/margins": 0.0018243432277813554,
|
206 |
+
"rewards/rejected": -0.004412819631397724,
|
207 |
"step": 120
|
208 |
},
|
209 |
{
|
|
|
213 |
"logits/rejected": -2.038203001022339,
|
214 |
"logps/chosen": -30.475027084350586,
|
215 |
"logps/rejected": -32.046302795410156,
|
216 |
+
"loss": 1.0012,
|
217 |
"rewards/accuracies": 0.48750001192092896,
|
218 |
+
"rewards/chosen": 0.00013765673793386668,
|
219 |
+
"rewards/margins": -0.001203052932396531,
|
220 |
+
"rewards/rejected": 0.0013407098595052958,
|
221 |
"step": 130
|
222 |
},
|
223 |
{
|
|
|
227 |
"logits/rejected": -1.9872560501098633,
|
228 |
"logps/chosen": -31.407278060913086,
|
229 |
"logps/rejected": -32.543296813964844,
|
230 |
+
"loss": 0.9958,
|
231 |
"rewards/accuracies": 0.48750001192092896,
|
232 |
+
"rewards/chosen": 0.0024768461007624865,
|
233 |
+
"rewards/margins": 0.004160420503467321,
|
234 |
+
"rewards/rejected": -0.0016835747519508004,
|
235 |
"step": 140
|
236 |
},
|
237 |
{
|
|
|
241 |
"logits/rejected": -1.891632080078125,
|
242 |
"logps/chosen": -34.20501708984375,
|
243 |
"logps/rejected": -34.77235412597656,
|
244 |
+
"loss": 0.9994,
|
245 |
"rewards/accuracies": 0.4625000059604645,
|
246 |
+
"rewards/chosen": -0.002465262543410063,
|
247 |
+
"rewards/margins": 0.0005540539277717471,
|
248 |
+
"rewards/rejected": -0.003019316354766488,
|
249 |
"step": 150
|
250 |
},
|
251 |
{
|
|
|
255 |
"logits/rejected": -1.939327597618103,
|
256 |
"logps/chosen": -36.144107818603516,
|
257 |
"logps/rejected": -32.72822570800781,
|
258 |
+
"loss": 0.9934,
|
259 |
"rewards/accuracies": 0.5375000238418579,
|
260 |
+
"rewards/chosen": 0.005514757242053747,
|
261 |
+
"rewards/margins": 0.006609287112951279,
|
262 |
+
"rewards/rejected": -0.001094528939574957,
|
263 |
"step": 160
|
264 |
},
|
265 |
{
|
|
|
269 |
"logits/rejected": -2.0346803665161133,
|
270 |
"logps/chosen": -33.771934509277344,
|
271 |
"logps/rejected": -31.371145248413086,
|
272 |
+
"loss": 0.991,
|
273 |
"rewards/accuracies": 0.5375000238418579,
|
274 |
+
"rewards/chosen": 0.005186290480196476,
|
275 |
+
"rewards/margins": 0.009045111015439034,
|
276 |
+
"rewards/rejected": -0.0038588200695812702,
|
277 |
"step": 170
|
278 |
},
|
279 |
{
|
|
|
283 |
"logits/rejected": -2.0528526306152344,
|
284 |
"logps/chosen": -32.524593353271484,
|
285 |
"logps/rejected": -32.510643005371094,
|
286 |
+
"loss": 0.9909,
|
287 |
"rewards/accuracies": 0.550000011920929,
|
288 |
+
"rewards/chosen": 0.004038581624627113,
|
289 |
+
"rewards/margins": 0.009138843044638634,
|
290 |
+
"rewards/rejected": -0.005100260488688946,
|
291 |
"step": 180
|
292 |
},
|
293 |
{
|
|
|
297 |
"logits/rejected": -2.0457024574279785,
|
298 |
"logps/chosen": -31.492746353149414,
|
299 |
"logps/rejected": -31.319293975830078,
|
300 |
+
"loss": 1.0005,
|
301 |
"rewards/accuracies": 0.5,
|
302 |
+
"rewards/chosen": -0.0007584737613797188,
|
303 |
+
"rewards/margins": -0.0005425609415397048,
|
304 |
+
"rewards/rejected": -0.0002159134455723688,
|
305 |
"step": 190
|
306 |
},
|
307 |
{
|
|
|
311 |
"logits/rejected": -1.9232347011566162,
|
312 |
"logps/chosen": -31.5926513671875,
|
313 |
"logps/rejected": -32.78697204589844,
|
314 |
+
"loss": 0.9981,
|
315 |
"rewards/accuracies": 0.5874999761581421,
|
316 |
+
"rewards/chosen": 0.00272659445181489,
|
317 |
+
"rewards/margins": 0.0019350949442014098,
|
318 |
+
"rewards/rejected": 0.0007914996822364628,
|
319 |
"step": 200
|
320 |
},
|
321 |
{
|
|
|
324 |
"eval_logits/rejected": -2.2389235496520996,
|
325 |
"eval_logps/chosen": -34.018714904785156,
|
326 |
"eval_logps/rejected": -37.517478942871094,
|
327 |
+
"eval_loss": 0.9966108798980713,
|
328 |
"eval_rewards/accuracies": 0.5328072905540466,
|
329 |
+
"eval_rewards/chosen": 0.0031679810490459204,
|
330 |
+
"eval_rewards/margins": 0.0033402685075998306,
|
331 |
+
"eval_rewards/rejected": -0.00017228761862497777,
|
332 |
+
"eval_runtime": 145.8204,
|
333 |
+
"eval_samples_per_second": 2.352,
|
334 |
"eval_steps_per_second": 0.295,
|
335 |
"step": 200
|
336 |
},
|
|
|
341 |
"logits/rejected": -2.042539596557617,
|
342 |
"logps/chosen": -31.948400497436523,
|
343 |
"logps/rejected": -33.86983871459961,
|
344 |
+
"loss": 0.994,
|
345 |
"rewards/accuracies": 0.5,
|
346 |
+
"rewards/chosen": 0.004580962937325239,
|
347 |
+
"rewards/margins": 0.0060088313184678555,
|
348 |
+
"rewards/rejected": -0.0014278689632192254,
|
349 |
"step": 210
|
350 |
},
|
351 |
{
|
|
|
355 |
"logits/rejected": -1.9399843215942383,
|
356 |
"logps/chosen": -30.099853515625,
|
357 |
"logps/rejected": -31.55409812927246,
|
358 |
+
"loss": 0.9985,
|
359 |
"rewards/accuracies": 0.5249999761581421,
|
360 |
+
"rewards/chosen": 0.0023371793795377016,
|
361 |
+
"rewards/margins": 0.001521837548352778,
|
362 |
+
"rewards/rejected": 0.0008153414237312973,
|
363 |
"step": 220
|
364 |
},
|
365 |
{
|
|
|
369 |
"logits/rejected": -1.9875112771987915,
|
370 |
"logps/chosen": -33.387638092041016,
|
371 |
"logps/rejected": -31.554845809936523,
|
372 |
+
"loss": 0.9929,
|
373 |
"rewards/accuracies": 0.550000011920929,
|
374 |
+
"rewards/chosen": 0.0059667350724339485,
|
375 |
+
"rewards/margins": 0.007074916269630194,
|
376 |
+
"rewards/rejected": -0.0011081816628575325,
|
377 |
"step": 230
|
378 |
},
|
379 |
{
|
|
|
383 |
"logits/rejected": -1.9607274532318115,
|
384 |
"logps/chosen": -34.158443450927734,
|
385 |
"logps/rejected": -34.963783264160156,
|
386 |
+
"loss": 0.9983,
|
387 |
"rewards/accuracies": 0.512499988079071,
|
388 |
+
"rewards/chosen": -0.0009392915526404977,
|
389 |
+
"rewards/margins": 0.0016581962117925286,
|
390 |
+
"rewards/rejected": -0.0025974875316023827,
|
391 |
"step": 240
|
392 |
},
|
393 |
{
|
|
|
397 |
"logits/rejected": -2.0210862159729004,
|
398 |
"logps/chosen": -32.9254035949707,
|
399 |
"logps/rejected": -36.251712799072266,
|
400 |
+
"loss": 0.9947,
|
401 |
"rewards/accuracies": 0.5375000238418579,
|
402 |
+
"rewards/chosen": -6.657392077613622e-05,
|
403 |
+
"rewards/margins": 0.005290796514600515,
|
404 |
+
"rewards/rejected": -0.005357370711863041,
|
405 |
"step": 250
|
406 |
},
|
407 |
{
|
|
|
411 |
"logits/rejected": -1.8886839151382446,
|
412 |
"logps/chosen": -34.194557189941406,
|
413 |
"logps/rejected": -35.51445770263672,
|
414 |
+
"loss": 0.9993,
|
415 |
"rewards/accuracies": 0.48750001192092896,
|
416 |
+
"rewards/chosen": -0.0012316217180341482,
|
417 |
+
"rewards/margins": 0.0007347877835854888,
|
418 |
+
"rewards/rejected": -0.001966409618034959,
|
419 |
"step": 260
|
420 |
},
|
421 |
{
|
|
|
425 |
"logits/rejected": -1.8734045028686523,
|
426 |
"logps/chosen": -34.40558624267578,
|
427 |
"logps/rejected": -31.752349853515625,
|
428 |
+
"loss": 1.0054,
|
429 |
"rewards/accuracies": 0.4625000059604645,
|
430 |
+
"rewards/chosen": -0.0015969609376043081,
|
431 |
+
"rewards/margins": -0.005393522325903177,
|
432 |
+
"rewards/rejected": 0.0037965611554682255,
|
433 |
"step": 270
|
434 |
},
|
435 |
{
|
|
|
439 |
"logits/rejected": -1.9693737030029297,
|
440 |
"logps/chosen": -35.33230209350586,
|
441 |
"logps/rejected": -31.845691680908203,
|
442 |
+
"loss": 0.9942,
|
443 |
"rewards/accuracies": 0.512499988079071,
|
444 |
+
"rewards/chosen": 0.004867845680564642,
|
445 |
+
"rewards/margins": 0.005807613953948021,
|
446 |
+
"rewards/rejected": -0.0009397673420608044,
|
447 |
"step": 280
|
448 |
},
|
449 |
{
|
|
|
453 |
"logits/rejected": -2.060606002807617,
|
454 |
"logps/chosen": -30.907390594482422,
|
455 |
"logps/rejected": -32.64055252075195,
|
456 |
+
"loss": 0.9989,
|
457 |
"rewards/accuracies": 0.550000011920929,
|
458 |
+
"rewards/chosen": 0.00334669416770339,
|
459 |
+
"rewards/margins": 0.0010900094639509916,
|
460 |
+
"rewards/rejected": 0.002256684470921755,
|
461 |
"step": 290
|
462 |
},
|
463 |
{
|
|
|
467 |
"logits/rejected": -1.9440828561782837,
|
468 |
"logps/chosen": -32.894561767578125,
|
469 |
"logps/rejected": -30.812387466430664,
|
470 |
+
"loss": 0.9944,
|
471 |
"rewards/accuracies": 0.5,
|
472 |
+
"rewards/chosen": 0.006510418839752674,
|
473 |
+
"rewards/margins": 0.005625545047223568,
|
474 |
+
"rewards/rejected": 0.0008848730358295143,
|
475 |
"step": 300
|
476 |
},
|
477 |
{
|
|
|
480 |
"eval_logits/rejected": -2.2386035919189453,
|
481 |
"eval_logps/chosen": -34.017669677734375,
|
482 |
"eval_logps/rejected": -37.50018310546875,
|
483 |
+
"eval_loss": 0.9999422430992126,
|
484 |
"eval_rewards/accuracies": 0.490448534488678,
|
485 |
+
"eval_rewards/chosen": 0.003376962151378393,
|
486 |
+
"eval_rewards/margins": 8.94198747118935e-05,
|
487 |
+
"eval_rewards/rejected": 0.00328754261136055,
|
488 |
+
"eval_runtime": 145.8777,
|
489 |
+
"eval_samples_per_second": 2.351,
|
490 |
"eval_steps_per_second": 0.295,
|
491 |
"step": 300
|
492 |
},
|
|
|
497 |
"logits/rejected": -1.925451636314392,
|
498 |
"logps/chosen": -31.603496551513672,
|
499 |
"logps/rejected": -33.734046936035156,
|
500 |
+
"loss": 1.0017,
|
501 |
"rewards/accuracies": 0.4749999940395355,
|
502 |
+
"rewards/chosen": -2.9836966859875247e-05,
|
503 |
+
"rewards/margins": -0.001651174039579928,
|
504 |
+
"rewards/rejected": 0.0016213370254263282,
|
505 |
"step": 310
|
506 |
},
|
507 |
{
|
|
|
511 |
"logits/rejected": -1.9679291248321533,
|
512 |
"logps/chosen": -34.585323333740234,
|
513 |
"logps/rejected": -33.57084274291992,
|
514 |
+
"loss": 0.9914,
|
515 |
"rewards/accuracies": 0.5375000238418579,
|
516 |
+
"rewards/chosen": 0.00215008738450706,
|
517 |
+
"rewards/margins": 0.008550785481929779,
|
518 |
+
"rewards/rejected": -0.006400698330253363,
|
519 |
"step": 320
|
520 |
},
|
521 |
{
|
|
|
525 |
"logits/rejected": -2.014427661895752,
|
526 |
"logps/chosen": -33.49116516113281,
|
527 |
"logps/rejected": -32.47978973388672,
|
528 |
+
"loss": 1.001,
|
529 |
"rewards/accuracies": 0.4375,
|
530 |
+
"rewards/chosen": -0.0010741351870819926,
|
531 |
+
"rewards/margins": -0.0010322926100343466,
|
532 |
+
"rewards/rejected": -4.1842577047646046e-05,
|
533 |
"step": 330
|
534 |
},
|
535 |
{
|
|
|
539 |
"logits/rejected": -2.0872654914855957,
|
540 |
"logps/chosen": -34.18492889404297,
|
541 |
"logps/rejected": -33.08319854736328,
|
542 |
+
"loss": 1.0081,
|
543 |
"rewards/accuracies": 0.42500001192092896,
|
544 |
+
"rewards/chosen": -0.001107137417420745,
|
545 |
+
"rewards/margins": -0.00807467382401228,
|
546 |
+
"rewards/rejected": 0.006967535708099604,
|
547 |
"step": 340
|
548 |
},
|
549 |
{
|
|
|
553 |
"logits/rejected": -1.9736032485961914,
|
554 |
"logps/chosen": -33.23271942138672,
|
555 |
"logps/rejected": -32.4765510559082,
|
556 |
+
"loss": 0.9878,
|
557 |
"rewards/accuracies": 0.612500011920929,
|
558 |
+
"rewards/chosen": 0.00987558625638485,
|
559 |
+
"rewards/margins": 0.012156149372458458,
|
560 |
+
"rewards/rejected": -0.002280563348904252,
|
561 |
"step": 350
|
562 |
},
|
563 |
{
|
|
|
567 |
"logits/rejected": -1.940913438796997,
|
568 |
"logps/chosen": -32.22040939331055,
|
569 |
"logps/rejected": -35.28728103637695,
|
570 |
+
"loss": 1.0034,
|
571 |
"rewards/accuracies": 0.48750001192092896,
|
572 |
+
"rewards/chosen": -0.003144216490909457,
|
573 |
+
"rewards/margins": -0.003439632710069418,
|
574 |
+
"rewards/rejected": 0.00029541627736762166,
|
575 |
"step": 360
|
576 |
},
|
577 |
{
|
|
|
581 |
"logits/rejected": -2.0630898475646973,
|
582 |
"logps/chosen": -33.63695526123047,
|
583 |
"logps/rejected": -29.226470947265625,
|
584 |
+
"loss": 0.997,
|
585 |
"rewards/accuracies": 0.5625,
|
586 |
+
"rewards/chosen": 0.0030023655854165554,
|
587 |
+
"rewards/margins": 0.0030115083791315556,
|
588 |
+
"rewards/rejected": -9.143399438471533e-06,
|
589 |
"step": 370
|
590 |
},
|
591 |
{
|
|
|
595 |
"logits/rejected": -1.9310123920440674,
|
596 |
"logps/chosen": -34.243560791015625,
|
597 |
"logps/rejected": -30.892742156982422,
|
598 |
+
"loss": 0.9969,
|
599 |
"rewards/accuracies": 0.5249999761581421,
|
600 |
+
"rewards/chosen": -0.0009016336989589036,
|
601 |
+
"rewards/margins": 0.0031216249335557222,
|
602 |
+
"rewards/rejected": -0.004023258574306965,
|
603 |
"step": 380
|
604 |
},
|
605 |
{
|
606 |
"epoch": 1.0,
|
607 |
"step": 385,
|
608 |
"total_flos": 0.0,
|
609 |
+
"train_loss": 0.998242255619594,
|
610 |
+
"train_runtime": 3253.3829,
|
611 |
"train_samples_per_second": 0.946,
|
612 |
"train_steps_per_second": 0.118
|
613 |
}
|