chaoweihuang commited on
Commit
5a9b970
1 Parent(s): 499ffe1

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
all_results.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9998706171561651,
3
+ "eval_kl": 0.01532898098230362,
4
+ "eval_logps/chosen": -340.4056263858093,
5
+ "eval_logps/rejected": -394.53469279661016,
6
+ "eval_loss": 0.40464144945144653,
7
+ "eval_rewards/chosen": -0.636441926469824,
8
+ "eval_rewards/margins": 1.5609318031713308,
9
+ "eval_rewards/rejected": -2.197373729641155,
10
+ "eval_runtime": 260.8191,
11
+ "eval_samples": 1846,
12
+ "eval_samples_per_second": 7.078,
13
+ "eval_steps_per_second": 3.539,
14
+ "total_flos": 0.0,
15
+ "train_loss": 0.420091498218955,
16
+ "train_runtime": 6442.4359,
17
+ "train_samples": 15458,
18
+ "train_samples_per_second": 2.399,
19
+ "train_steps_per_second": 0.15
20
+ }
config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "experiments/gemma-2b-zephyr-sft/kto-mix-14k-lf-response-f1_0.75-fg",
3
+ "architectures": [
4
+ "GemmaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 106,
9
+ "eos_token_id": 107,
10
+ "head_dim": 256,
11
+ "hidden_act": "gelu",
12
+ "hidden_activation": "gelu_pytorch_tanh",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 16384,
16
+ "max_position_embeddings": 8192,
17
+ "model_type": "gemma",
18
+ "num_attention_heads": 8,
19
+ "num_hidden_layers": 18,
20
+ "num_key_value_heads": 1,
21
+ "pad_token_id": 107,
22
+ "rms_norm_eps": 1e-06,
23
+ "rope_scaling": null,
24
+ "rope_theta": 10000.0,
25
+ "torch_dtype": "float16",
26
+ "transformers_version": "4.41.1",
27
+ "use_cache": true,
28
+ "vocab_size": 256000
29
+ }
eval_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9998706171561651,
3
+ "eval_kl": 0.01532898098230362,
4
+ "eval_logps/chosen": -340.4056263858093,
5
+ "eval_logps/rejected": -394.53469279661016,
6
+ "eval_loss": 0.40464144945144653,
7
+ "eval_rewards/chosen": -0.636441926469824,
8
+ "eval_rewards/margins": 1.5609318031713308,
9
+ "eval_rewards/rejected": -2.197373729641155,
10
+ "eval_runtime": 260.8191,
11
+ "eval_samples": 1846,
12
+ "eval_samples_per_second": 7.078,
13
+ "eval_steps_per_second": 3.539
14
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 106,
4
+ "eos_token_id": 107,
5
+ "pad_token_id": 107,
6
+ "transformers_version": "4.41.1"
7
+ }
model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c4aed628a8fe01eb701d36f1f0c0380fb01a849a2ded5faaf846ce7d483d52e
3
+ size 4945242104
model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df65b7cd6e4dfa43d9dfae7f6eaf58a8846de61aa3aefa34bb910f7aaf34f5f6
3
+ size 1115697720
model.safetensors.index.json ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 6060920832
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "model-00002-of-00002.safetensors",
7
+ "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
8
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
9
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
10
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
11
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
12
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
13
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
14
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
15
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
16
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
17
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
18
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
19
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
20
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
21
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
22
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
23
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
24
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
25
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
26
+ "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
27
+ "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
28
+ "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
29
+ "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
30
+ "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
31
+ "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
32
+ "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
33
+ "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
34
+ "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
35
+ "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
36
+ "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
37
+ "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
38
+ "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
39
+ "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
40
+ "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
41
+ "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
42
+ "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
43
+ "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
44
+ "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
45
+ "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
46
+ "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
47
+ "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
48
+ "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
49
+ "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
50
+ "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
51
+ "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
52
+ "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
53
+ "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
54
+ "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
55
+ "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
56
+ "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
57
+ "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
58
+ "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
59
+ "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
60
+ "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
61
+ "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
62
+ "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
63
+ "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
64
+ "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
65
+ "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
66
+ "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
67
+ "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
68
+ "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
69
+ "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
70
+ "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
71
+ "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
72
+ "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
73
+ "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
74
+ "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
75
+ "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
76
+ "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
77
+ "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
78
+ "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
79
+ "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
80
+ "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
81
+ "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
82
+ "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
83
+ "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
84
+ "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
85
+ "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
86
+ "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
87
+ "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
88
+ "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
89
+ "model.layers.17.input_layernorm.weight": "model-00002-of-00002.safetensors",
90
+ "model.layers.17.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
91
+ "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
92
+ "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
93
+ "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
94
+ "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
95
+ "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
96
+ "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
97
+ "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
98
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
99
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
100
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
101
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
102
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
103
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
104
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
105
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
106
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
107
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
108
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
109
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
110
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
111
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
112
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
113
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
114
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
115
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
116
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
117
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
118
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
119
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
120
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
121
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
122
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
123
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
124
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
125
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
126
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
127
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
128
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
129
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
130
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
131
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
132
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
133
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
134
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
135
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
136
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
137
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
138
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
139
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
140
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
141
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
142
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
143
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
144
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
145
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
146
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
147
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
148
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
149
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
150
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
151
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
152
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
153
+ "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
154
+ "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
155
+ "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
156
+ "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
157
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
158
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
159
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
160
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
161
+ "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
162
+ "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
163
+ "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
164
+ "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
165
+ "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
166
+ "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
167
+ "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
168
+ "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
169
+ "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
170
+ "model.norm.weight": "model-00002-of-00002.safetensors"
171
+ }
172
+ }
runs/Jun15_18-00-51_yme9r2ctr1718003783591-jbt67/events.out.tfevents.1718445923.yme9r2ctr1718003783591-jbt67.24351.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f7303497ebd3ebf7cf2023f7848cd627a067ff707662a1c4edcc56b1311f0e6
3
+ size 61635
runs/Jun15_18-00-51_yme9r2ctr1718003783591-jbt67/events.out.tfevents.1718452635.yme9r2ctr1718003783591-jbt67.24351.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e9ce5a98a4e3e4ffe804211ca56753e60fdac3a8b24a404846d13184b2dffec
3
+ size 696
special_tokens_map.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<|im_start|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<|im_end|>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ }
17
+ ],
18
+ "bos_token": "<|im_start|>",
19
+ "eos_token": "<|im_end|>",
20
+ "pad_token": "<|im_end|>",
21
+ "unk_token": {
22
+ "content": "<unk>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false
27
+ }
28
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:322a5f52ab5cab196761ab397a022d6fa3a2e1418585e532bb6efb2fedd2ae94
3
+ size 17477501
tokenizer_config.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<pad>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<eos>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "<bos>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "3": {
30
+ "content": "<unk>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "106": {
38
+ "content": "<|im_start|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "107": {
46
+ "content": "<|im_end|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ }
53
+ },
54
+ "additional_special_tokens": [
55
+ "<|im_start|>",
56
+ "<|im_end|>"
57
+ ],
58
+ "bos_token": "<|im_start|>",
59
+ "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
60
+ "clean_up_tokenization_spaces": false,
61
+ "eos_token": "<|im_end|>",
62
+ "legacy": null,
63
+ "model_max_length": 2048,
64
+ "pad_token": "<|im_end|>",
65
+ "sp_model_kwargs": {},
66
+ "spaces_between_special_tokens": false,
67
+ "tokenizer_class": "GemmaTokenizer",
68
+ "unk_token": "<unk>",
69
+ "use_default_system_prompt": false
70
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9998706171561651,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.420091498218955,
5
+ "train_runtime": 6442.4359,
6
+ "train_samples": 15458,
7
+ "train_samples_per_second": 2.399,
8
+ "train_steps_per_second": 0.15
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,1346 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.9998706171561651,
5
+ "eval_steps": 200,
6
+ "global_step": 966,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.010350627506792599,
13
+ "grad_norm": 36.896687952176364,
14
+ "kl": 0.006904316134750843,
15
+ "learning_rate": 2.126812117966759e-07,
16
+ "logps/chosen": -417.2011954066265,
17
+ "logps/rejected": -375.1744622564935,
18
+ "loss": 0.4997,
19
+ "rewards/chosen": -0.003417713455407016,
20
+ "rewards/margins": 0.002690252778750596,
21
+ "rewards/rejected": -0.006107966234157612,
22
+ "step": 10
23
+ },
24
+ {
25
+ "epoch": 0.020701255013585197,
26
+ "grad_norm": 36.344980205107255,
27
+ "kl": 0.004612588789314032,
28
+ "learning_rate": 3.096603651432316e-07,
29
+ "logps/chosen": -277.7969021267361,
30
+ "logps/rejected": -407.0654296875,
31
+ "loss": 0.4892,
32
+ "rewards/chosen": -0.04762052165137397,
33
+ "rewards/margins": 0.07216862355819856,
34
+ "rewards/rejected": -0.11978914520957253,
35
+ "step": 20
36
+ },
37
+ {
38
+ "epoch": 0.0310518825203778,
39
+ "grad_norm": 36.71259965236908,
40
+ "kl": 0.0,
41
+ "learning_rate": 3.602235071779947e-07,
42
+ "logps/chosen": -355.01975574712645,
43
+ "logps/rejected": -431.1890785530822,
44
+ "loss": 0.4788,
45
+ "rewards/chosen": -0.2371558485359981,
46
+ "rewards/margins": 0.25850485494016306,
47
+ "rewards/rejected": -0.49566070347616115,
48
+ "step": 30
49
+ },
50
+ {
51
+ "epoch": 0.041402510027170394,
52
+ "grad_norm": 38.99807359143627,
53
+ "kl": 0.016344498842954636,
54
+ "learning_rate": 3.9466076978545386e-07,
55
+ "logps/chosen": -347.33896998355266,
56
+ "logps/rejected": -404.498046875,
57
+ "loss": 0.4398,
58
+ "rewards/chosen": -0.5145087995027241,
59
+ "rewards/margins": 0.5702773885320601,
60
+ "rewards/rejected": -1.0847861880347842,
61
+ "step": 40
62
+ },
63
+ {
64
+ "epoch": 0.051753137533962996,
65
+ "grad_norm": 34.4366373643818,
66
+ "kl": 0.0,
67
+ "learning_rate": 4.208077428062608e-07,
68
+ "logps/chosen": -401.31200610632186,
69
+ "logps/rejected": -408.78579837328766,
70
+ "loss": 0.4906,
71
+ "rewards/chosen": -1.0610687431247754,
72
+ "rewards/margins": 0.6205490982038848,
73
+ "rewards/rejected": -1.6816178413286602,
74
+ "step": 50
75
+ },
76
+ {
77
+ "epoch": 0.0621037650407556,
78
+ "grad_norm": 40.6191164803455,
79
+ "kl": 0.0,
80
+ "learning_rate": 4.4189144263242994e-07,
81
+ "logps/chosen": -292.2215844131098,
82
+ "logps/rejected": -431.56860977564105,
83
+ "loss": 0.4649,
84
+ "rewards/chosen": -0.6987755007860137,
85
+ "rewards/margins": 0.5288097293321754,
86
+ "rewards/rejected": -1.227585230118189,
87
+ "step": 60
88
+ },
89
+ {
90
+ "epoch": 0.0724543925475482,
91
+ "grad_norm": 30.15586760876392,
92
+ "kl": 0.0,
93
+ "learning_rate": 4.5955828020052655e-07,
94
+ "logps/chosen": -355.0156035370879,
95
+ "logps/rejected": -401.8425045289855,
96
+ "loss": 0.4658,
97
+ "rewards/chosen": -0.5742165701729911,
98
+ "rewards/margins": 0.8813798008004576,
99
+ "rewards/rejected": -1.4555963709734487,
100
+ "step": 70
101
+ },
102
+ {
103
+ "epoch": 0.08280502005434079,
104
+ "grad_norm": 28.330817825249255,
105
+ "kl": 0.0,
106
+ "learning_rate": 4.7476282570257156e-07,
107
+ "logps/chosen": -394.8970209478022,
108
+ "logps/rejected": -387.24026268115944,
109
+ "loss": 0.4731,
110
+ "rewards/chosen": -0.7301217383080787,
111
+ "rewards/margins": 0.8021065933268852,
112
+ "rewards/rejected": -1.5322283316349639,
113
+ "step": 80
114
+ },
115
+ {
116
+ "epoch": 0.0931556475611334,
117
+ "grad_norm": 30.67181137678842,
118
+ "kl": 0.0,
119
+ "learning_rate": 4.881082258136016e-07,
120
+ "logps/chosen": -294.80318509615387,
121
+ "logps/rejected": -385.4679163490854,
122
+ "loss": 0.4613,
123
+ "rewards/chosen": -0.6603363232734876,
124
+ "rewards/margins": 0.5270779856001309,
125
+ "rewards/rejected": -1.1874143088736184,
126
+ "step": 90
127
+ },
128
+ {
129
+ "epoch": 0.10350627506792599,
130
+ "grad_norm": 31.541642718713373,
131
+ "kl": 0.0,
132
+ "learning_rate": 5e-07,
133
+ "logps/chosen": -356.5553466796875,
134
+ "logps/rejected": -427.236279296875,
135
+ "loss": 0.4466,
136
+ "rewards/chosen": -0.5922697067260743,
137
+ "rewards/margins": 0.8280625343322754,
138
+ "rewards/rejected": -1.4203322410583497,
139
+ "step": 100
140
+ },
141
+ {
142
+ "epoch": 0.11385690257471859,
143
+ "grad_norm": 28.188607938438196,
144
+ "kl": 0.0,
145
+ "learning_rate": 5e-07,
146
+ "logps/chosen": -349.71470997431504,
147
+ "logps/rejected": -431.29777298850576,
148
+ "loss": 0.445,
149
+ "rewards/chosen": -0.7172038457165025,
150
+ "rewards/margins": 0.5990428885518614,
151
+ "rewards/rejected": -1.316246734268364,
152
+ "step": 110
153
+ },
154
+ {
155
+ "epoch": 0.1242075300815112,
156
+ "grad_norm": 28.777259577988843,
157
+ "kl": 0.0,
158
+ "learning_rate": 5e-07,
159
+ "logps/chosen": -317.6811767578125,
160
+ "logps/rejected": -400.8,
161
+ "loss": 0.4577,
162
+ "rewards/chosen": -0.748396921157837,
163
+ "rewards/margins": 0.6573972225189209,
164
+ "rewards/rejected": -1.4057941436767578,
165
+ "step": 120
166
+ },
167
+ {
168
+ "epoch": 0.13455815758830378,
169
+ "grad_norm": 27.073111094683828,
170
+ "kl": 0.0,
171
+ "learning_rate": 5e-07,
172
+ "logps/chosen": -334.3945529513889,
173
+ "logps/rejected": -424.9839564732143,
174
+ "loss": 0.4654,
175
+ "rewards/chosen": -0.797715589735243,
176
+ "rewards/margins": 0.9449826437329489,
177
+ "rewards/rejected": -1.742698233468192,
178
+ "step": 130
179
+ },
180
+ {
181
+ "epoch": 0.1449087850950964,
182
+ "grad_norm": 29.19719970356803,
183
+ "kl": 0.0,
184
+ "learning_rate": 5e-07,
185
+ "logps/chosen": -368.06354166666665,
186
+ "logps/rejected": -338.69952566964287,
187
+ "loss": 0.487,
188
+ "rewards/chosen": -0.6288536071777344,
189
+ "rewards/margins": 0.4991338457380021,
190
+ "rewards/rejected": -1.1279874529157365,
191
+ "step": 140
192
+ },
193
+ {
194
+ "epoch": 0.155259412601889,
195
+ "grad_norm": 31.37561442050933,
196
+ "kl": 0.0,
197
+ "learning_rate": 5e-07,
198
+ "logps/chosen": -308.6735341061828,
199
+ "logps/rejected": -390.50953241604475,
200
+ "loss": 0.4775,
201
+ "rewards/chosen": -0.5661141180223034,
202
+ "rewards/margins": 0.7382167362512507,
203
+ "rewards/rejected": -1.304330854273554,
204
+ "step": 150
205
+ },
206
+ {
207
+ "epoch": 0.16561004010868158,
208
+ "grad_norm": 36.002916810630985,
209
+ "kl": 0.07424011081457138,
210
+ "learning_rate": 5e-07,
211
+ "logps/chosen": -395.24665850903614,
212
+ "logps/rejected": -402.71707589285717,
213
+ "loss": 0.4599,
214
+ "rewards/chosen": -0.5169859277196678,
215
+ "rewards/margins": 0.6465992892484227,
216
+ "rewards/rejected": -1.1635852169680905,
217
+ "step": 160
218
+ },
219
+ {
220
+ "epoch": 0.1759606676154742,
221
+ "grad_norm": 27.151915007789793,
222
+ "kl": 0.0,
223
+ "learning_rate": 5e-07,
224
+ "logps/chosen": -380.0856370192308,
225
+ "logps/rejected": -437.3331269054878,
226
+ "loss": 0.434,
227
+ "rewards/chosen": -0.5822516710330279,
228
+ "rewards/margins": 0.9586351846739081,
229
+ "rewards/rejected": -1.540886855706936,
230
+ "step": 170
231
+ },
232
+ {
233
+ "epoch": 0.1863112951222668,
234
+ "grad_norm": 22.70085892654007,
235
+ "kl": 0.0,
236
+ "learning_rate": 5e-07,
237
+ "logps/chosen": -352.72755281690144,
238
+ "logps/rejected": -438.0743504213483,
239
+ "loss": 0.4081,
240
+ "rewards/chosen": -0.6838695364938655,
241
+ "rewards/margins": 0.9942577951862609,
242
+ "rewards/rejected": -1.6781273316801264,
243
+ "step": 180
244
+ },
245
+ {
246
+ "epoch": 0.19666192262905938,
247
+ "grad_norm": 27.14667046044915,
248
+ "kl": 0.0,
249
+ "learning_rate": 5e-07,
250
+ "logps/chosen": -320.2009880514706,
251
+ "logps/rejected": -407.538046875,
252
+ "loss": 0.449,
253
+ "rewards/chosen": -1.0842503267176011,
254
+ "rewards/margins": 1.4791047758214615,
255
+ "rewards/rejected": -2.5633551025390626,
256
+ "step": 190
257
+ },
258
+ {
259
+ "epoch": 0.20701255013585199,
260
+ "grad_norm": 27.694967881656005,
261
+ "kl": 0.005686330609023571,
262
+ "learning_rate": 5e-07,
263
+ "logps/chosen": -337.79836856617646,
264
+ "logps/rejected": -427.5978645833333,
265
+ "loss": 0.4522,
266
+ "rewards/chosen": -0.9811132094439339,
267
+ "rewards/margins": 1.1809233302696076,
268
+ "rewards/rejected": -2.1620365397135415,
269
+ "step": 200
270
+ },
271
+ {
272
+ "epoch": 0.20701255013585199,
273
+ "eval_kl": 0.0010393437696620822,
274
+ "eval_logps/chosen": -345.2487181263858,
275
+ "eval_logps/rejected": -393.2139168432203,
276
+ "eval_loss": 0.44461360573768616,
277
+ "eval_rewards/chosen": -1.1207509516612388,
278
+ "eval_rewards/margins": 0.9445489068199584,
279
+ "eval_rewards/rejected": -2.0652998584811972,
280
+ "eval_runtime": 261.4133,
281
+ "eval_samples_per_second": 7.062,
282
+ "eval_steps_per_second": 3.531,
283
+ "step": 200
284
+ },
285
+ {
286
+ "epoch": 0.2173631776426446,
287
+ "grad_norm": 30.464898770807604,
288
+ "kl": 0.0,
289
+ "learning_rate": 5e-07,
290
+ "logps/chosen": -365.0289713541667,
291
+ "logps/rejected": -427.5989879261364,
292
+ "loss": 0.4016,
293
+ "rewards/chosen": -0.8537895944383409,
294
+ "rewards/margins": 1.3811903818689213,
295
+ "rewards/rejected": -2.234979976307262,
296
+ "step": 210
297
+ },
298
+ {
299
+ "epoch": 0.22771380514943718,
300
+ "grad_norm": 23.41466055625897,
301
+ "kl": 0.0,
302
+ "learning_rate": 5e-07,
303
+ "logps/chosen": -345.475933908046,
304
+ "logps/rejected": -393.412189640411,
305
+ "loss": 0.4443,
306
+ "rewards/chosen": -0.9509018645889458,
307
+ "rewards/margins": 1.2836219608605406,
308
+ "rewards/rejected": -2.2345238254494864,
309
+ "step": 220
310
+ },
311
+ {
312
+ "epoch": 0.23806443265622979,
313
+ "grad_norm": 26.98695760593119,
314
+ "kl": 0.0,
315
+ "learning_rate": 5e-07,
316
+ "logps/chosen": -323.52855282738096,
317
+ "logps/rejected": -349.73843544407896,
318
+ "loss": 0.4764,
319
+ "rewards/chosen": -0.9678686232793898,
320
+ "rewards/margins": 0.599584660733254,
321
+ "rewards/rejected": -1.5674532840126438,
322
+ "step": 230
323
+ },
324
+ {
325
+ "epoch": 0.2484150601630224,
326
+ "grad_norm": 26.51688363505412,
327
+ "kl": 0.0,
328
+ "learning_rate": 5e-07,
329
+ "logps/chosen": -339.72511772260276,
330
+ "logps/rejected": -392.6373024425287,
331
+ "loss": 0.4254,
332
+ "rewards/chosen": -0.7914297548058915,
333
+ "rewards/margins": 0.7201490352668456,
334
+ "rewards/rejected": -1.511578790072737,
335
+ "step": 240
336
+ },
337
+ {
338
+ "epoch": 0.258765687669815,
339
+ "grad_norm": 38.3631109147077,
340
+ "kl": 0.0,
341
+ "learning_rate": 5e-07,
342
+ "logps/chosen": -362.05623478084414,
343
+ "logps/rejected": -361.7480233433735,
344
+ "loss": 0.4123,
345
+ "rewards/chosen": -0.5259268129026735,
346
+ "rewards/margins": 1.3802443193803648,
347
+ "rewards/rejected": -1.9061711322830384,
348
+ "step": 250
349
+ },
350
+ {
351
+ "epoch": 0.26911631517660756,
352
+ "grad_norm": 28.783330131851603,
353
+ "kl": 0.0,
354
+ "learning_rate": 5e-07,
355
+ "logps/chosen": -318.40223672945206,
356
+ "logps/rejected": -472.43588362068965,
357
+ "loss": 0.4135,
358
+ "rewards/chosen": -0.8065869579576466,
359
+ "rewards/margins": 1.0660637146918686,
360
+ "rewards/rejected": -1.8726506726495151,
361
+ "step": 260
362
+ },
363
+ {
364
+ "epoch": 0.27946694268340017,
365
+ "grad_norm": 25.596794311830312,
366
+ "kl": 0.0,
367
+ "learning_rate": 5e-07,
368
+ "logps/chosen": -367.7001139322917,
369
+ "logps/rejected": -436.5064808238636,
370
+ "loss": 0.4093,
371
+ "rewards/chosen": -0.8103501001993815,
372
+ "rewards/margins": 1.0771059267448657,
373
+ "rewards/rejected": -1.887456026944247,
374
+ "step": 270
375
+ },
376
+ {
377
+ "epoch": 0.2898175701901928,
378
+ "grad_norm": 31.49395928287787,
379
+ "kl": 0.0206025131046772,
380
+ "learning_rate": 5e-07,
381
+ "logps/chosen": -456.68581081081084,
382
+ "logps/rejected": -399.91547056686045,
383
+ "loss": 0.4345,
384
+ "rewards/chosen": -1.0405741511164486,
385
+ "rewards/margins": 0.9193381711718871,
386
+ "rewards/rejected": -1.9599123222883357,
387
+ "step": 280
388
+ },
389
+ {
390
+ "epoch": 0.3001681976969854,
391
+ "grad_norm": 27.507156588516853,
392
+ "kl": 0.0,
393
+ "learning_rate": 5e-07,
394
+ "logps/chosen": -354.3474633487654,
395
+ "logps/rejected": -430.79647943037975,
396
+ "loss": 0.4246,
397
+ "rewards/chosen": -0.8167637954523534,
398
+ "rewards/margins": 1.6583199540531843,
399
+ "rewards/rejected": -2.4750837495055378,
400
+ "step": 290
401
+ },
402
+ {
403
+ "epoch": 0.310518825203778,
404
+ "grad_norm": 27.243120877089865,
405
+ "kl": 0.0,
406
+ "learning_rate": 5e-07,
407
+ "logps/chosen": -369.0854611280488,
408
+ "logps/rejected": -411.7598407451923,
409
+ "loss": 0.4515,
410
+ "rewards/chosen": -0.9885020372344226,
411
+ "rewards/margins": 1.07946980364849,
412
+ "rewards/rejected": -2.0679718408829126,
413
+ "step": 300
414
+ },
415
+ {
416
+ "epoch": 0.3208694527105706,
417
+ "grad_norm": 26.41738454716243,
418
+ "kl": 0.0,
419
+ "learning_rate": 5e-07,
420
+ "logps/chosen": -328.5754642210145,
421
+ "logps/rejected": -473.32679429945057,
422
+ "loss": 0.4,
423
+ "rewards/chosen": -0.8000211853911912,
424
+ "rewards/margins": 1.4340147676694202,
425
+ "rewards/rejected": -2.2340359530606113,
426
+ "step": 310
427
+ },
428
+ {
429
+ "epoch": 0.33122008021736316,
430
+ "grad_norm": 27.90947815196134,
431
+ "kl": 0.0,
432
+ "learning_rate": 5e-07,
433
+ "logps/chosen": -379.9532833614865,
434
+ "logps/rejected": -414.0909792877907,
435
+ "loss": 0.4206,
436
+ "rewards/chosen": -0.707832078675966,
437
+ "rewards/margins": 1.2218697929741975,
438
+ "rewards/rejected": -1.9297018716501635,
439
+ "step": 320
440
+ },
441
+ {
442
+ "epoch": 0.34157070772415576,
443
+ "grad_norm": 34.323093394556274,
444
+ "kl": 0.0,
445
+ "learning_rate": 5e-07,
446
+ "logps/chosen": -302.920654296875,
447
+ "logps/rejected": -365.2463107638889,
448
+ "loss": 0.4599,
449
+ "rewards/chosen": -0.8200391422618519,
450
+ "rewards/margins": 1.4710271334407303,
451
+ "rewards/rejected": -2.2910662757025824,
452
+ "step": 330
453
+ },
454
+ {
455
+ "epoch": 0.3519213352309484,
456
+ "grad_norm": 35.00194682599148,
457
+ "kl": 0.020750045776367188,
458
+ "learning_rate": 5e-07,
459
+ "logps/chosen": -370.7525414156627,
460
+ "logps/rejected": -407.6445819805195,
461
+ "loss": 0.4401,
462
+ "rewards/chosen": -0.4423764699912933,
463
+ "rewards/margins": 0.795768243571511,
464
+ "rewards/rejected": -1.2381447135628043,
465
+ "step": 340
466
+ },
467
+ {
468
+ "epoch": 0.362271962737741,
469
+ "grad_norm": 28.74449923281838,
470
+ "kl": 0.0,
471
+ "learning_rate": 5e-07,
472
+ "logps/chosen": -369.2329220655488,
473
+ "logps/rejected": -412.28390424679486,
474
+ "loss": 0.443,
475
+ "rewards/chosen": -0.5598751161156631,
476
+ "rewards/margins": 0.7612275152820732,
477
+ "rewards/rejected": -1.3211026313977363,
478
+ "step": 350
479
+ },
480
+ {
481
+ "epoch": 0.3726225902445336,
482
+ "grad_norm": 33.80893504974849,
483
+ "kl": 0.0,
484
+ "learning_rate": 5e-07,
485
+ "logps/chosen": -333.06757269965277,
486
+ "logps/rejected": -476.0582386363636,
487
+ "loss": 0.3942,
488
+ "rewards/chosen": -0.8373040093315972,
489
+ "rewards/margins": 1.5102612081200184,
490
+ "rewards/rejected": -2.3475652174516157,
491
+ "step": 360
492
+ },
493
+ {
494
+ "epoch": 0.3829732177513262,
495
+ "grad_norm": 23.093501234844034,
496
+ "kl": 0.0,
497
+ "learning_rate": 5e-07,
498
+ "logps/chosen": -344.985234375,
499
+ "logps/rejected": -478.50422794117645,
500
+ "loss": 0.3956,
501
+ "rewards/chosen": -1.264248046875,
502
+ "rewards/margins": 1.5773571059283087,
503
+ "rewards/rejected": -2.8416051528033086,
504
+ "step": 370
505
+ },
506
+ {
507
+ "epoch": 0.39332384525811875,
508
+ "grad_norm": 23.6165146626171,
509
+ "kl": 0.0,
510
+ "learning_rate": 5e-07,
511
+ "logps/chosen": -331.19694890202703,
512
+ "logps/rejected": -458.86123728197674,
513
+ "loss": 0.4153,
514
+ "rewards/chosen": -1.1533899049501162,
515
+ "rewards/margins": 1.4770645798563884,
516
+ "rewards/rejected": -2.6304544848065046,
517
+ "step": 380
518
+ },
519
+ {
520
+ "epoch": 0.40367447276491136,
521
+ "grad_norm": 24.677426766885045,
522
+ "kl": 0.045375823974609375,
523
+ "learning_rate": 5e-07,
524
+ "logps/chosen": -335.65542204483694,
525
+ "logps/rejected": -439.2108800551471,
526
+ "loss": 0.4565,
527
+ "rewards/chosen": -0.7980768784232761,
528
+ "rewards/margins": 1.7186397981765629,
529
+ "rewards/rejected": -2.516716676599839,
530
+ "step": 390
531
+ },
532
+ {
533
+ "epoch": 0.41402510027170397,
534
+ "grad_norm": 32.96461257238746,
535
+ "kl": 0.0,
536
+ "learning_rate": 5e-07,
537
+ "logps/chosen": -313.4358512581169,
538
+ "logps/rejected": -439.3407379518072,
539
+ "loss": 0.4056,
540
+ "rewards/chosen": -0.6841482187246347,
541
+ "rewards/margins": 1.2257567208579143,
542
+ "rewards/rejected": -1.909904939582549,
543
+ "step": 400
544
+ },
545
+ {
546
+ "epoch": 0.41402510027170397,
547
+ "eval_kl": 0.02226920612156391,
548
+ "eval_logps/chosen": -340.0967987804878,
549
+ "eval_logps/rejected": -387.47169623940675,
550
+ "eval_loss": 0.44011881947517395,
551
+ "eval_rewards/chosen": -0.6055575284090909,
552
+ "eval_rewards/margins": 0.885514011000999,
553
+ "eval_rewards/rejected": -1.49107153941009,
554
+ "eval_runtime": 260.8826,
555
+ "eval_samples_per_second": 7.076,
556
+ "eval_steps_per_second": 3.538,
557
+ "step": 400
558
+ },
559
+ {
560
+ "epoch": 0.4243757277784966,
561
+ "grad_norm": 26.751958968613145,
562
+ "kl": 0.0,
563
+ "learning_rate": 5e-07,
564
+ "logps/chosen": -372.82060185185185,
565
+ "logps/rejected": -399.93740110759495,
566
+ "loss": 0.4265,
567
+ "rewards/chosen": -0.5553302058467159,
568
+ "rewards/margins": 1.1236735458839013,
569
+ "rewards/rejected": -1.6790037517306171,
570
+ "step": 410
571
+ },
572
+ {
573
+ "epoch": 0.4347263552852892,
574
+ "grad_norm": 32.246235152731096,
575
+ "kl": 0.0,
576
+ "learning_rate": 5e-07,
577
+ "logps/chosen": -348.120418595679,
578
+ "logps/rejected": -402.0041287579114,
579
+ "loss": 0.4239,
580
+ "rewards/chosen": -0.7425044495382427,
581
+ "rewards/margins": 1.1616015940238618,
582
+ "rewards/rejected": -1.9041060435621044,
583
+ "step": 420
584
+ },
585
+ {
586
+ "epoch": 0.44507698279208174,
587
+ "grad_norm": 27.212254824473547,
588
+ "kl": 0.04713239520788193,
589
+ "learning_rate": 5e-07,
590
+ "logps/chosen": -330.6474880642361,
591
+ "logps/rejected": -389.7398792613636,
592
+ "loss": 0.3978,
593
+ "rewards/chosen": -0.9934198591444228,
594
+ "rewards/margins": 1.0819970525876441,
595
+ "rewards/rejected": -2.075416911732067,
596
+ "step": 430
597
+ },
598
+ {
599
+ "epoch": 0.45542761029887435,
600
+ "grad_norm": 24.894169784907362,
601
+ "kl": 0.0,
602
+ "learning_rate": 5e-07,
603
+ "logps/chosen": -376.64564344618054,
604
+ "logps/rejected": -515.4582297585227,
605
+ "loss": 0.3809,
606
+ "rewards/chosen": -0.8414801491631402,
607
+ "rewards/margins": 1.9854850094727796,
608
+ "rewards/rejected": -2.82696515863592,
609
+ "step": 440
610
+ },
611
+ {
612
+ "epoch": 0.46577823780566696,
613
+ "grad_norm": 33.54283688924568,
614
+ "kl": 0.0,
615
+ "learning_rate": 5e-07,
616
+ "logps/chosen": -373.34893120659723,
617
+ "logps/rejected": -440.4869495738636,
618
+ "loss": 0.4141,
619
+ "rewards/chosen": -1.090722295973036,
620
+ "rewards/margins": 1.4239928987291124,
621
+ "rewards/rejected": -2.5147151947021484,
622
+ "step": 450
623
+ },
624
+ {
625
+ "epoch": 0.47612886531245957,
626
+ "grad_norm": 25.873975620632326,
627
+ "kl": 0.0,
628
+ "learning_rate": 5e-07,
629
+ "logps/chosen": -297.38337725903614,
630
+ "logps/rejected": -368.12974330357144,
631
+ "loss": 0.4304,
632
+ "rewards/chosen": -0.642763620399567,
633
+ "rewards/margins": 1.4404461233478962,
634
+ "rewards/rejected": -2.0832097437474633,
635
+ "step": 460
636
+ },
637
+ {
638
+ "epoch": 0.4864794928192522,
639
+ "grad_norm": 31.22528359201901,
640
+ "kl": 0.0,
641
+ "learning_rate": 5e-07,
642
+ "logps/chosen": -364.6297576121795,
643
+ "logps/rejected": -414.5650247713415,
644
+ "loss": 0.397,
645
+ "rewards/chosen": -0.4758866138947316,
646
+ "rewards/margins": 1.4139596296147006,
647
+ "rewards/rejected": -1.889846243509432,
648
+ "step": 470
649
+ },
650
+ {
651
+ "epoch": 0.4968301203260448,
652
+ "grad_norm": 26.938362242757048,
653
+ "kl": 0.0,
654
+ "learning_rate": 5e-07,
655
+ "logps/chosen": -398.0412109375,
656
+ "logps/rejected": -487.195654296875,
657
+ "loss": 0.4055,
658
+ "rewards/chosen": -0.5108624458312988,
659
+ "rewards/margins": 1.3080674171447755,
660
+ "rewards/rejected": -1.8189298629760742,
661
+ "step": 480
662
+ },
663
+ {
664
+ "epoch": 0.5071807478328374,
665
+ "grad_norm": 29.10971517563742,
666
+ "kl": 0.0,
667
+ "learning_rate": 5e-07,
668
+ "logps/chosen": -390.874140625,
669
+ "logps/rejected": -379.295703125,
670
+ "loss": 0.4363,
671
+ "rewards/chosen": -0.9983409627278645,
672
+ "rewards/margins": 0.9311472754384957,
673
+ "rewards/rejected": -1.9294882381663603,
674
+ "step": 490
675
+ },
676
+ {
677
+ "epoch": 0.51753137533963,
678
+ "grad_norm": 27.404424128068055,
679
+ "kl": 0.0,
680
+ "learning_rate": 5e-07,
681
+ "logps/chosen": -330.4240828804348,
682
+ "logps/rejected": -394.64285714285717,
683
+ "loss": 0.3714,
684
+ "rewards/chosen": -0.724442468173262,
685
+ "rewards/margins": 1.7530021910493512,
686
+ "rewards/rejected": -2.477444659222613,
687
+ "step": 500
688
+ },
689
+ {
690
+ "epoch": 0.5278820028464226,
691
+ "grad_norm": 30.205347992720988,
692
+ "kl": 0.010777664370834827,
693
+ "learning_rate": 5e-07,
694
+ "logps/chosen": -371.4545238597973,
695
+ "logps/rejected": -425.5056776889535,
696
+ "loss": 0.4051,
697
+ "rewards/chosen": -0.7893987088590055,
698
+ "rewards/margins": 1.3341055749573099,
699
+ "rewards/rejected": -2.1235042838163154,
700
+ "step": 510
701
+ },
702
+ {
703
+ "epoch": 0.5382326303532151,
704
+ "grad_norm": 27.47044972378467,
705
+ "kl": 0.0,
706
+ "learning_rate": 5e-07,
707
+ "logps/chosen": -317.4935569324713,
708
+ "logps/rejected": -394.0045751284247,
709
+ "loss": 0.433,
710
+ "rewards/chosen": -0.9540053619735542,
711
+ "rewards/margins": 1.3108872161514458,
712
+ "rewards/rejected": -2.264892578125,
713
+ "step": 520
714
+ },
715
+ {
716
+ "epoch": 0.5485832578600077,
717
+ "grad_norm": 32.30343597091197,
718
+ "kl": 0.06133537366986275,
719
+ "learning_rate": 5e-07,
720
+ "logps/chosen": -377.94091796875,
721
+ "logps/rejected": -412.06171875,
722
+ "loss": 0.415,
723
+ "rewards/chosen": -0.42492337226867677,
724
+ "rewards/margins": 1.4800034999847413,
725
+ "rewards/rejected": -1.904926872253418,
726
+ "step": 530
727
+ },
728
+ {
729
+ "epoch": 0.5589338853668003,
730
+ "grad_norm": 34.615081184959564,
731
+ "kl": 0.0,
732
+ "learning_rate": 5e-07,
733
+ "logps/chosen": -326.7767721036585,
734
+ "logps/rejected": -424.0320012019231,
735
+ "loss": 0.4396,
736
+ "rewards/chosen": -0.4712153178889577,
737
+ "rewards/margins": 0.8699719880505454,
738
+ "rewards/rejected": -1.3411873059395032,
739
+ "step": 540
740
+ },
741
+ {
742
+ "epoch": 0.5692845128735929,
743
+ "grad_norm": 34.56564210195164,
744
+ "kl": 0.06511452049016953,
745
+ "learning_rate": 5e-07,
746
+ "logps/chosen": -407.66327617694805,
747
+ "logps/rejected": -371.1233998493976,
748
+ "loss": 0.375,
749
+ "rewards/chosen": -0.19861872784503096,
750
+ "rewards/margins": 1.8991040725361816,
751
+ "rewards/rejected": -2.0977228003812125,
752
+ "step": 550
753
+ },
754
+ {
755
+ "epoch": 0.5796351403803855,
756
+ "grad_norm": 26.904457572554023,
757
+ "kl": 0.14332695305347443,
758
+ "learning_rate": 5e-07,
759
+ "logps/chosen": -336.1937744140625,
760
+ "logps/rejected": -404.095556640625,
761
+ "loss": 0.4105,
762
+ "rewards/chosen": -0.39203429222106934,
763
+ "rewards/margins": 1.2247087955474854,
764
+ "rewards/rejected": -1.6167430877685547,
765
+ "step": 560
766
+ },
767
+ {
768
+ "epoch": 0.5899857678871782,
769
+ "grad_norm": 29.784323096457744,
770
+ "kl": 0.005803870968520641,
771
+ "learning_rate": 5e-07,
772
+ "logps/chosen": -300.69694346005156,
773
+ "logps/rejected": -407.36216517857144,
774
+ "loss": 0.4499,
775
+ "rewards/chosen": -0.4008376917888209,
776
+ "rewards/margins": 1.1861952923846664,
777
+ "rewards/rejected": -1.5870329841734871,
778
+ "step": 570
779
+ },
780
+ {
781
+ "epoch": 0.6003363953939708,
782
+ "grad_norm": 24.46799204128634,
783
+ "kl": 0.22002115845680237,
784
+ "learning_rate": 5e-07,
785
+ "logps/chosen": -347.5768229166667,
786
+ "logps/rejected": -443.0283717105263,
787
+ "loss": 0.4189,
788
+ "rewards/chosen": -0.3804002716427758,
789
+ "rewards/margins": 1.428496646403071,
790
+ "rewards/rejected": -1.808896918045847,
791
+ "step": 580
792
+ },
793
+ {
794
+ "epoch": 0.6106870229007634,
795
+ "grad_norm": 30.275312642751995,
796
+ "kl": 0.13701924681663513,
797
+ "learning_rate": 5e-07,
798
+ "logps/chosen": -355.59707919034093,
799
+ "logps/rejected": -430.7814670138889,
800
+ "loss": 0.4381,
801
+ "rewards/chosen": -0.5888070193204012,
802
+ "rewards/margins": 1.315426489319464,
803
+ "rewards/rejected": -1.9042335086398654,
804
+ "step": 590
805
+ },
806
+ {
807
+ "epoch": 0.621037650407556,
808
+ "grad_norm": 32.60832471668693,
809
+ "kl": 0.02227201499044895,
810
+ "learning_rate": 5e-07,
811
+ "logps/chosen": -350.7683919270833,
812
+ "logps/rejected": -444.4885896381579,
813
+ "loss": 0.4163,
814
+ "rewards/chosen": -0.5356872195289248,
815
+ "rewards/margins": 1.2915597977793605,
816
+ "rewards/rejected": -1.8272470173082853,
817
+ "step": 600
818
+ },
819
+ {
820
+ "epoch": 0.621037650407556,
821
+ "eval_kl": 0.009310548193752766,
822
+ "eval_logps/chosen": -339.2911238913525,
823
+ "eval_logps/rejected": -389.9666313559322,
824
+ "eval_loss": 0.418056845664978,
825
+ "eval_rewards/chosen": -0.5249900056623302,
826
+ "eval_rewards/margins": 1.2155782523322407,
827
+ "eval_rewards/rejected": -1.740568257994571,
828
+ "eval_runtime": 261.1582,
829
+ "eval_samples_per_second": 7.069,
830
+ "eval_steps_per_second": 3.534,
831
+ "step": 600
832
+ },
833
+ {
834
+ "epoch": 0.6313882779143486,
835
+ "grad_norm": 22.18017793377208,
836
+ "kl": 0.039247892796993256,
837
+ "learning_rate": 5e-07,
838
+ "logps/chosen": -383.04136439732144,
839
+ "logps/rejected": -429.54263466282896,
840
+ "loss": 0.4132,
841
+ "rewards/chosen": -0.4417642865862165,
842
+ "rewards/margins": 1.5167117298097539,
843
+ "rewards/rejected": -1.9584760163959705,
844
+ "step": 610
845
+ },
846
+ {
847
+ "epoch": 0.6417389054211412,
848
+ "grad_norm": 29.924696721027633,
849
+ "kl": 0.03644561767578125,
850
+ "learning_rate": 5e-07,
851
+ "logps/chosen": -382.4176720727848,
852
+ "logps/rejected": -498.68258101851853,
853
+ "loss": 0.3878,
854
+ "rewards/chosen": -0.47994140431850774,
855
+ "rewards/margins": 1.639255923095169,
856
+ "rewards/rejected": -2.119197327413677,
857
+ "step": 620
858
+ },
859
+ {
860
+ "epoch": 0.6520895329279337,
861
+ "grad_norm": 28.116353403382174,
862
+ "kl": 0.0513916015625,
863
+ "learning_rate": 5e-07,
864
+ "logps/chosen": -311.66650390625,
865
+ "logps/rejected": -419.412939453125,
866
+ "loss": 0.3978,
867
+ "rewards/chosen": -0.5402119159698486,
868
+ "rewards/margins": 1.4218003749847412,
869
+ "rewards/rejected": -1.9620122909545898,
870
+ "step": 630
871
+ },
872
+ {
873
+ "epoch": 0.6624401604347263,
874
+ "grad_norm": 30.136959971403833,
875
+ "kl": 0.0,
876
+ "learning_rate": 5e-07,
877
+ "logps/chosen": -367.5465806934931,
878
+ "logps/rejected": -451.5183638649425,
879
+ "loss": 0.4204,
880
+ "rewards/chosen": -1.145416991351402,
881
+ "rewards/margins": 0.9065032307885044,
882
+ "rewards/rejected": -2.0519202221399064,
883
+ "step": 640
884
+ },
885
+ {
886
+ "epoch": 0.6727907879415189,
887
+ "grad_norm": 25.55983506887128,
888
+ "kl": 0.0,
889
+ "learning_rate": 5e-07,
890
+ "logps/chosen": -331.976943597561,
891
+ "logps/rejected": -408.9071514423077,
892
+ "loss": 0.4119,
893
+ "rewards/chosen": -0.9971130185010957,
894
+ "rewards/margins": 1.7702796535241447,
895
+ "rewards/rejected": -2.7673926720252404,
896
+ "step": 650
897
+ },
898
+ {
899
+ "epoch": 0.6831414154483115,
900
+ "grad_norm": 25.82328491415139,
901
+ "kl": 0.008263682946562767,
902
+ "learning_rate": 5e-07,
903
+ "logps/chosen": -361.71830610795456,
904
+ "logps/rejected": -486.77197265625,
905
+ "loss": 0.4233,
906
+ "rewards/chosen": -0.7347448522394354,
907
+ "rewards/margins": 2.0849816678750392,
908
+ "rewards/rejected": -2.8197265201144748,
909
+ "step": 660
910
+ },
911
+ {
912
+ "epoch": 0.6934920429551041,
913
+ "grad_norm": 26.68136550686645,
914
+ "kl": 0.09514617919921875,
915
+ "learning_rate": 5e-07,
916
+ "logps/chosen": -355.8723958333333,
917
+ "logps/rejected": -486.3736672794118,
918
+ "loss": 0.3773,
919
+ "rewards/chosen": -0.6042455546061198,
920
+ "rewards/margins": 2.0892714347091377,
921
+ "rewards/rejected": -2.6935169893152575,
922
+ "step": 670
923
+ },
924
+ {
925
+ "epoch": 0.7038426704618967,
926
+ "grad_norm": 21.168784955584055,
927
+ "kl": 0.0,
928
+ "learning_rate": 5e-07,
929
+ "logps/chosen": -330.87958757267444,
930
+ "logps/rejected": -369.6824588260135,
931
+ "loss": 0.4207,
932
+ "rewards/chosen": -0.6469083830367687,
933
+ "rewards/margins": 1.6826272301521037,
934
+ "rewards/rejected": -2.3295356131888725,
935
+ "step": 680
936
+ },
937
+ {
938
+ "epoch": 0.7141932979686894,
939
+ "grad_norm": 31.900093457461022,
940
+ "kl": 0.08187294006347656,
941
+ "learning_rate": 5e-07,
942
+ "logps/chosen": -417.3225528492647,
943
+ "logps/rejected": -440.6748471467391,
944
+ "loss": 0.4112,
945
+ "rewards/chosen": -0.7309647728415096,
946
+ "rewards/margins": 1.2705956041965338,
947
+ "rewards/rejected": -2.0015603770380435,
948
+ "step": 690
949
+ },
950
+ {
951
+ "epoch": 0.724543925475482,
952
+ "grad_norm": 27.570529431002825,
953
+ "kl": 0.0,
954
+ "learning_rate": 5e-07,
955
+ "logps/chosen": -337.6109751506024,
956
+ "logps/rejected": -445.5989752435065,
957
+ "loss": 0.4132,
958
+ "rewards/chosen": -0.41143114021025506,
959
+ "rewards/margins": 1.2929300198698992,
960
+ "rewards/rejected": -1.7043611600801543,
961
+ "step": 700
962
+ },
963
+ {
964
+ "epoch": 0.7348945529822746,
965
+ "grad_norm": 25.994342612564424,
966
+ "kl": 0.004410457797348499,
967
+ "learning_rate": 5e-07,
968
+ "logps/chosen": -273.53585737179486,
969
+ "logps/rejected": -435.5107660060976,
970
+ "loss": 0.392,
971
+ "rewards/chosen": -0.5441466111403245,
972
+ "rewards/margins": 1.5854568910867144,
973
+ "rewards/rejected": -2.129603502227039,
974
+ "step": 710
975
+ },
976
+ {
977
+ "epoch": 0.7452451804890672,
978
+ "grad_norm": 28.362840310964046,
979
+ "kl": 0.0,
980
+ "learning_rate": 5e-07,
981
+ "logps/chosen": -332.836171875,
982
+ "logps/rejected": -474.0301470588235,
983
+ "loss": 0.3782,
984
+ "rewards/chosen": -0.7581790669759114,
985
+ "rewards/margins": 1.799709726969401,
986
+ "rewards/rejected": -2.5578887939453123,
987
+ "step": 720
988
+ },
989
+ {
990
+ "epoch": 0.7555958079958598,
991
+ "grad_norm": 21.561327202318306,
992
+ "kl": 0.0,
993
+ "learning_rate": 5e-07,
994
+ "logps/chosen": -275.46072571536143,
995
+ "logps/rejected": -412.22519277597405,
996
+ "loss": 0.3875,
997
+ "rewards/chosen": -0.5184578493417028,
998
+ "rewards/margins": 1.9156346847111583,
999
+ "rewards/rejected": -2.434092534052861,
1000
+ "step": 730
1001
+ },
1002
+ {
1003
+ "epoch": 0.7659464355026524,
1004
+ "grad_norm": 18.54675354753111,
1005
+ "kl": 0.0,
1006
+ "learning_rate": 5e-07,
1007
+ "logps/chosen": -368.63963607594934,
1008
+ "logps/rejected": -425.9934895833333,
1009
+ "loss": 0.4036,
1010
+ "rewards/chosen": -0.8795772504202927,
1011
+ "rewards/margins": 1.8751527858089703,
1012
+ "rewards/rejected": -2.754730036229263,
1013
+ "step": 740
1014
+ },
1015
+ {
1016
+ "epoch": 0.7762970630094449,
1017
+ "grad_norm": 27.548342849043514,
1018
+ "kl": 0.0,
1019
+ "learning_rate": 5e-07,
1020
+ "logps/chosen": -395.18419471153845,
1021
+ "logps/rejected": -404.6112804878049,
1022
+ "loss": 0.4149,
1023
+ "rewards/chosen": -0.8401767046023638,
1024
+ "rewards/margins": 1.619423790526733,
1025
+ "rewards/rejected": -2.459600495129097,
1026
+ "step": 750
1027
+ },
1028
+ {
1029
+ "epoch": 0.7866476905162375,
1030
+ "grad_norm": 26.78674064602181,
1031
+ "kl": 0.0,
1032
+ "learning_rate": 5e-07,
1033
+ "logps/chosen": -361.57657251602564,
1034
+ "logps/rejected": -418.945693597561,
1035
+ "loss": 0.4087,
1036
+ "rewards/chosen": -0.6369634775015024,
1037
+ "rewards/margins": 1.3416846384474304,
1038
+ "rewards/rejected": -1.9786481159489329,
1039
+ "step": 760
1040
+ },
1041
+ {
1042
+ "epoch": 0.7969983180230301,
1043
+ "grad_norm": 25.603994333749306,
1044
+ "kl": 0.02446603775024414,
1045
+ "learning_rate": 5e-07,
1046
+ "logps/chosen": -319.98974609375,
1047
+ "logps/rejected": -407.2808314732143,
1048
+ "loss": 0.39,
1049
+ "rewards/chosen": -0.4739310615941098,
1050
+ "rewards/margins": 1.6105410496991381,
1051
+ "rewards/rejected": -2.084472111293248,
1052
+ "step": 770
1053
+ },
1054
+ {
1055
+ "epoch": 0.8073489455298227,
1056
+ "grad_norm": 26.587028848139315,
1057
+ "kl": 0.04417114332318306,
1058
+ "learning_rate": 5e-07,
1059
+ "logps/chosen": -318.715,
1060
+ "logps/rejected": -429.6086856617647,
1061
+ "loss": 0.3786,
1062
+ "rewards/chosen": -0.553302001953125,
1063
+ "rewards/margins": 1.9189411836511947,
1064
+ "rewards/rejected": -2.4722431856043197,
1065
+ "step": 780
1066
+ },
1067
+ {
1068
+ "epoch": 0.8176995730366153,
1069
+ "grad_norm": 23.329347974769387,
1070
+ "kl": 0.10457019507884979,
1071
+ "learning_rate": 5e-07,
1072
+ "logps/chosen": -382.36054180194805,
1073
+ "logps/rejected": -415.20811370481925,
1074
+ "loss": 0.3949,
1075
+ "rewards/chosen": -0.9674345734831574,
1076
+ "rewards/margins": 2.05568157274237,
1077
+ "rewards/rejected": -3.023116146225527,
1078
+ "step": 790
1079
+ },
1080
+ {
1081
+ "epoch": 0.8280502005434079,
1082
+ "grad_norm": 25.709767109519216,
1083
+ "kl": 0.0,
1084
+ "learning_rate": 5e-07,
1085
+ "logps/chosen": -314.2330375339674,
1086
+ "logps/rejected": -512.2797564338235,
1087
+ "loss": 0.4158,
1088
+ "rewards/chosen": -0.7708018759022588,
1089
+ "rewards/margins": 2.3479348399754985,
1090
+ "rewards/rejected": -3.1187367158777572,
1091
+ "step": 800
1092
+ },
1093
+ {
1094
+ "epoch": 0.8280502005434079,
1095
+ "eval_kl": 0.006037264596670866,
1096
+ "eval_logps/chosen": -341.35116407982264,
1097
+ "eval_logps/rejected": -395.00337658898303,
1098
+ "eval_loss": 0.41274696588516235,
1099
+ "eval_rewards/chosen": -0.7309938418098669,
1100
+ "eval_rewards/margins": 1.5132525602193967,
1101
+ "eval_rewards/rejected": -2.2442464020292636,
1102
+ "eval_runtime": 260.9097,
1103
+ "eval_samples_per_second": 7.075,
1104
+ "eval_steps_per_second": 3.538,
1105
+ "step": 800
1106
+ },
1107
+ {
1108
+ "epoch": 0.8384008280502006,
1109
+ "grad_norm": 35.370868792942815,
1110
+ "kl": 0.038701437413692474,
1111
+ "learning_rate": 5e-07,
1112
+ "logps/chosen": -300.92038143382354,
1113
+ "logps/rejected": -437.9970833333333,
1114
+ "loss": 0.4117,
1115
+ "rewards/chosen": -0.6198445039636948,
1116
+ "rewards/margins": 1.6185169055415134,
1117
+ "rewards/rejected": -2.2383614095052082,
1118
+ "step": 810
1119
+ },
1120
+ {
1121
+ "epoch": 0.8487514555569932,
1122
+ "grad_norm": 27.200562796310017,
1123
+ "kl": 0.03499946743249893,
1124
+ "learning_rate": 5e-07,
1125
+ "logps/chosen": -420.0014134457237,
1126
+ "logps/rejected": -474.64820498511904,
1127
+ "loss": 0.3712,
1128
+ "rewards/chosen": -0.21315298582378187,
1129
+ "rewards/margins": 2.169883309749135,
1130
+ "rewards/rejected": -2.3830362955729165,
1131
+ "step": 820
1132
+ },
1133
+ {
1134
+ "epoch": 0.8591020830637858,
1135
+ "grad_norm": 22.838766986028332,
1136
+ "kl": 0.08836288750171661,
1137
+ "learning_rate": 5e-07,
1138
+ "logps/chosen": -410.17025862068965,
1139
+ "logps/rejected": -479.935466609589,
1140
+ "loss": 0.4239,
1141
+ "rewards/chosen": -0.5182619642937321,
1142
+ "rewards/margins": 1.6473036309411782,
1143
+ "rewards/rejected": -2.16556559523491,
1144
+ "step": 830
1145
+ },
1146
+ {
1147
+ "epoch": 0.8694527105705784,
1148
+ "grad_norm": 31.190543721407206,
1149
+ "kl": 0.0,
1150
+ "learning_rate": 5e-07,
1151
+ "logps/chosen": -392.1194540895062,
1152
+ "logps/rejected": -380.65261570411394,
1153
+ "loss": 0.4195,
1154
+ "rewards/chosen": -0.5796352904519917,
1155
+ "rewards/margins": 1.1813462089422924,
1156
+ "rewards/rejected": -1.760981499394284,
1157
+ "step": 840
1158
+ },
1159
+ {
1160
+ "epoch": 0.879803338077371,
1161
+ "grad_norm": 27.922649096371728,
1162
+ "kl": 0.05376587063074112,
1163
+ "learning_rate": 5e-07,
1164
+ "logps/chosen": -342.7175263554217,
1165
+ "logps/rejected": -405.21989143668833,
1166
+ "loss": 0.3893,
1167
+ "rewards/chosen": -0.3039788625326501,
1168
+ "rewards/margins": 1.6599957309890625,
1169
+ "rewards/rejected": -1.9639745935217126,
1170
+ "step": 850
1171
+ },
1172
+ {
1173
+ "epoch": 0.8901539655841635,
1174
+ "grad_norm": 27.062117676313864,
1175
+ "kl": 0.029529189690947533,
1176
+ "learning_rate": 5e-07,
1177
+ "logps/chosen": -339.9334415584416,
1178
+ "logps/rejected": -389.23075112951807,
1179
+ "loss": 0.4056,
1180
+ "rewards/chosen": -0.7721986646776076,
1181
+ "rewards/margins": 1.4165069634991618,
1182
+ "rewards/rejected": -2.1887056281767694,
1183
+ "step": 860
1184
+ },
1185
+ {
1186
+ "epoch": 0.9005045930909561,
1187
+ "grad_norm": 26.777659950643177,
1188
+ "kl": 0.027013396844267845,
1189
+ "learning_rate": 5e-07,
1190
+ "logps/chosen": -359.30659239969134,
1191
+ "logps/rejected": -432.05760482594934,
1192
+ "loss": 0.4124,
1193
+ "rewards/chosen": -1.0398042466905382,
1194
+ "rewards/margins": 1.4762831525628244,
1195
+ "rewards/rejected": -2.5160873992533626,
1196
+ "step": 870
1197
+ },
1198
+ {
1199
+ "epoch": 0.9108552205977487,
1200
+ "grad_norm": 26.582657305921924,
1201
+ "kl": 0.0,
1202
+ "learning_rate": 5e-07,
1203
+ "logps/chosen": -347.1331422483766,
1204
+ "logps/rejected": -441.9758565512048,
1205
+ "loss": 0.3819,
1206
+ "rewards/chosen": -0.478736332484654,
1207
+ "rewards/margins": 2.0877041119101123,
1208
+ "rewards/rejected": -2.5664404443947664,
1209
+ "step": 880
1210
+ },
1211
+ {
1212
+ "epoch": 0.9212058481045413,
1213
+ "grad_norm": 26.122755291889042,
1214
+ "kl": 0.0009471893426962197,
1215
+ "learning_rate": 5e-07,
1216
+ "logps/chosen": -334.71205003955697,
1217
+ "logps/rejected": -477.93663194444446,
1218
+ "loss": 0.3907,
1219
+ "rewards/chosen": -0.6639707058290892,
1220
+ "rewards/margins": 2.118597389813754,
1221
+ "rewards/rejected": -2.7825680956428434,
1222
+ "step": 890
1223
+ },
1224
+ {
1225
+ "epoch": 0.9315564756113339,
1226
+ "grad_norm": 27.631179779669328,
1227
+ "kl": 0.03726501390337944,
1228
+ "learning_rate": 5e-07,
1229
+ "logps/chosen": -354.04930971746575,
1230
+ "logps/rejected": -398.39897629310343,
1231
+ "loss": 0.3698,
1232
+ "rewards/chosen": -0.6091255292500535,
1233
+ "rewards/margins": 1.8384917494519042,
1234
+ "rewards/rejected": -2.4476172787019577,
1235
+ "step": 900
1236
+ },
1237
+ {
1238
+ "epoch": 0.9419071031181265,
1239
+ "grad_norm": 31.5668708111869,
1240
+ "kl": 0.0027565001510083675,
1241
+ "learning_rate": 5e-07,
1242
+ "logps/chosen": -339.08727254746833,
1243
+ "logps/rejected": -447.73466435185185,
1244
+ "loss": 0.3717,
1245
+ "rewards/chosen": -0.4057273864746094,
1246
+ "rewards/margins": 2.1597686108247736,
1247
+ "rewards/rejected": -2.565495997299383,
1248
+ "step": 910
1249
+ },
1250
+ {
1251
+ "epoch": 0.9522577306249191,
1252
+ "grad_norm": 23.52230221185674,
1253
+ "kl": 0.0,
1254
+ "learning_rate": 5e-07,
1255
+ "logps/chosen": -331.8610026041667,
1256
+ "logps/rejected": -437.75386186079544,
1257
+ "loss": 0.3801,
1258
+ "rewards/chosen": -0.9904574288262261,
1259
+ "rewards/margins": 1.9297606053978504,
1260
+ "rewards/rejected": -2.9202180342240767,
1261
+ "step": 920
1262
+ },
1263
+ {
1264
+ "epoch": 0.9626083581317117,
1265
+ "grad_norm": 21.084240235211357,
1266
+ "kl": 0.005317878909409046,
1267
+ "learning_rate": 5e-07,
1268
+ "logps/chosen": -443.4196810787671,
1269
+ "logps/rejected": -434.53286637931035,
1270
+ "loss": 0.371,
1271
+ "rewards/chosen": -0.6944470340258455,
1272
+ "rewards/margins": 2.5923810180125884,
1273
+ "rewards/rejected": -3.286828052038434,
1274
+ "step": 930
1275
+ },
1276
+ {
1277
+ "epoch": 0.9729589856385044,
1278
+ "grad_norm": 20.680591232042584,
1279
+ "kl": 0.0,
1280
+ "learning_rate": 5e-07,
1281
+ "logps/chosen": -323.59056991185895,
1282
+ "logps/rejected": -446.6455792682927,
1283
+ "loss": 0.3827,
1284
+ "rewards/chosen": -0.837760729667468,
1285
+ "rewards/margins": 2.667289185181046,
1286
+ "rewards/rejected": -3.505049914848514,
1287
+ "step": 940
1288
+ },
1289
+ {
1290
+ "epoch": 0.983309613145297,
1291
+ "grad_norm": 27.520713861963205,
1292
+ "kl": 0.0,
1293
+ "learning_rate": 5e-07,
1294
+ "logps/chosen": -321.9452868009868,
1295
+ "logps/rejected": -363.61830357142856,
1296
+ "loss": 0.3648,
1297
+ "rewards/chosen": -0.34084164468865646,
1298
+ "rewards/margins": 1.999740703362869,
1299
+ "rewards/rejected": -2.3405823480515253,
1300
+ "step": 950
1301
+ },
1302
+ {
1303
+ "epoch": 0.9936602406520896,
1304
+ "grad_norm": 27.18091953431505,
1305
+ "kl": 0.0,
1306
+ "learning_rate": 5e-07,
1307
+ "logps/chosen": -348.9962173655063,
1308
+ "logps/rejected": -325.42737268518516,
1309
+ "loss": 0.3878,
1310
+ "rewards/chosen": -0.4670451200461086,
1311
+ "rewards/margins": 1.8737544706415845,
1312
+ "rewards/rejected": -2.340799590687693,
1313
+ "step": 960
1314
+ },
1315
+ {
1316
+ "epoch": 0.9998706171561651,
1317
+ "step": 966,
1318
+ "total_flos": 0.0,
1319
+ "train_loss": 0.420091498218955,
1320
+ "train_runtime": 6442.4359,
1321
+ "train_samples_per_second": 2.399,
1322
+ "train_steps_per_second": 0.15
1323
+ }
1324
+ ],
1325
+ "logging_steps": 10,
1326
+ "max_steps": 966,
1327
+ "num_input_tokens_seen": 0,
1328
+ "num_train_epochs": 1,
1329
+ "save_steps": 500,
1330
+ "stateful_callbacks": {
1331
+ "TrainerControl": {
1332
+ "args": {
1333
+ "should_epoch_stop": false,
1334
+ "should_evaluate": false,
1335
+ "should_log": false,
1336
+ "should_save": false,
1337
+ "should_training_stop": false
1338
+ },
1339
+ "attributes": {}
1340
+ }
1341
+ },
1342
+ "total_flos": 0.0,
1343
+ "train_batch_size": 1,
1344
+ "trial_name": null,
1345
+ "trial_params": null
1346
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cba89c369383099651285d9ab13eedbdac616246d4efcda96755a315addafe1
3
+ size 7288