Training in progress, step 10

Browse files

Files changed (4) hide show

adapter_config.json +5 -5
adapter_model.safetensors +1 -1
trainer_state.json +57 -57
training_args.bin +1 -1

adapter_config.json CHANGED Viewed

@@ -20,13 +20,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "k_proj",
     "v_proj",
-    "q_proj",
     "down_proj",
-    "o_proj",
-    "up_proj",
-    "gate_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "up_proj",
+    "gate_proj",
+    "o_proj",
     "v_proj",
     "down_proj",
+    "q_proj",
+    "k_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f4d210f635eac44e942700b41347f4eb64ae38698cc6c694f1f45bcf575e5762
 size 323014560

 version https://git-lfs.github.com/spec/v1
+oid sha256:edf7264f1b7d1fe326fc6176263327445742f9a35ffd19d0c04df5344f7f1fab
 size 323014560

trainer_state.json CHANGED Viewed

@@ -10,201 +10,201 @@
   "log_history": [
     {
       "epoch": 0.03686635944700461,
-      "grad_norm": 0.28515195710660185,
       "learning_rate": 0.0,
       "loss": 1.4734,
       "step": 1
     },
     {
       "epoch": 0.07373271889400922,
-      "grad_norm": 0.26537944818973397,
       "learning_rate": 0.0003,
-      "loss": 1.2799,
       "step": 2
     },
     {
       "epoch": 0.11059907834101383,
-      "grad_norm": 0.32456262433064903,
       "learning_rate": 0.0003,
-      "loss": 1.4609,
       "step": 3
     },
     {
       "epoch": 0.14746543778801843,
-      "grad_norm": 0.3941851070383984,
       "learning_rate": 0.0003,
-      "loss": 1.1246,
       "step": 4
     },
     {
       "epoch": 0.18433179723502305,
-      "grad_norm": 0.14354429714527142,
       "learning_rate": 0.0003,
-      "loss": 1.0118,
       "step": 5
     },
     {
       "epoch": 0.22119815668202766,
-      "grad_norm": 0.15244360645214902,
       "learning_rate": 0.0003,
-      "loss": 1.0724,
       "step": 6
     },
     {
       "epoch": 0.25806451612903225,
-      "grad_norm": 0.17439504736452185,
       "learning_rate": 0.0003,
-      "loss": 0.8952,
       "step": 7
     },
     {
       "epoch": 0.29493087557603687,
-      "grad_norm": 1.8871020983461488,
       "learning_rate": 0.0003,
-      "loss": 0.9702,
       "step": 8
     },
     {
       "epoch": 0.3317972350230415,
-      "grad_norm": 0.34464704757071946,
       "learning_rate": 0.0003,
-      "loss": 0.9233,
       "step": 9
     },
     {
       "epoch": 0.3686635944700461,
-      "grad_norm": 0.20938281732289094,
       "learning_rate": 0.0003,
-      "loss": 0.8031,
       "step": 10
     },
     {
       "epoch": 0.4055299539170507,
-      "grad_norm": 0.20691041179399886,
       "learning_rate": 0.0003,
-      "loss": 0.8318,
       "step": 11
     },
     {
       "epoch": 0.4423963133640553,
-      "grad_norm": 0.16746745456565448,
       "learning_rate": 0.0003,
-      "loss": 0.7613,
       "step": 12
     },
     {
       "epoch": 0.4792626728110599,
-      "grad_norm": 0.16864034661331284,
       "learning_rate": 0.0003,
-      "loss": 0.7453,
       "step": 13
     },
     {
       "epoch": 0.5161290322580645,
-      "grad_norm": 0.09470883605671848,
       "learning_rate": 0.0003,
-      "loss": 0.7272,
       "step": 14
     },
     {
       "epoch": 0.5529953917050692,
-      "grad_norm": 0.10835665836968225,
       "learning_rate": 0.0003,
-      "loss": 0.7714,
       "step": 15
     },
     {
       "epoch": 0.5898617511520737,
-      "grad_norm": 0.13018771502313373,
       "learning_rate": 0.0003,
-      "loss": 0.755,
       "step": 16
     },
     {
       "epoch": 0.6267281105990783,
-      "grad_norm": 0.12000422783438623,
       "learning_rate": 0.0003,
-      "loss": 0.7601,
       "step": 17
     },
     {
       "epoch": 0.663594470046083,
-      "grad_norm": 0.10710145117472845,
       "learning_rate": 0.0003,
-      "loss": 0.7243,
       "step": 18
     },
     {
       "epoch": 0.7004608294930875,
-      "grad_norm": 0.10199222794197009,
       "learning_rate": 0.0003,
-      "loss": 0.7617,
       "step": 19
     },
     {
       "epoch": 0.7373271889400922,
-      "grad_norm": 0.10841261085434695,
       "learning_rate": 0.0003,
-      "loss": 0.7061,
       "step": 20
     },
     {
       "epoch": 0.7741935483870968,
-      "grad_norm": 0.10854446791930876,
       "learning_rate": 0.0003,
-      "loss": 0.6664,
       "step": 21
     },
     {
       "epoch": 0.8110599078341014,
-      "grad_norm": 0.12871352769383354,
       "learning_rate": 0.0003,
-      "loss": 0.7521,
       "step": 22
     },
     {
       "epoch": 0.847926267281106,
-      "grad_norm": 0.09262524917386825,
       "learning_rate": 0.0003,
-      "loss": 0.7143,
       "step": 23
     },
     {
       "epoch": 0.8847926267281107,
-      "grad_norm": 0.1007189769028823,
       "learning_rate": 0.0003,
-      "loss": 0.7151,
       "step": 24
     },
     {
       "epoch": 0.9216589861751152,
-      "grad_norm": 0.11039499658777108,
       "learning_rate": 0.0003,
-      "loss": 0.668,
       "step": 25
     },
     {
       "epoch": 0.9585253456221198,
-      "grad_norm": 0.09552065085357173,
       "learning_rate": 0.0003,
-      "loss": 0.6846,
       "step": 26
     },
     {
       "epoch": 0.9953917050691244,
-      "grad_norm": 0.09742131147780461,
       "learning_rate": 0.0003,
-      "loss": 0.6828,
       "step": 27
     },
     {
       "epoch": 0.9953917050691244,
       "step": 27,
       "total_flos": 5728527974400.0,
-      "train_loss": 0.8682256230601558,
-      "train_runtime": 1037.3465,
-      "train_samples_per_second": 0.837,
-      "train_steps_per_second": 0.026
     }
   ],
   "logging_steps": 1.0,

   "log_history": [
     {
       "epoch": 0.03686635944700461,
+      "grad_norm": 0.2873728095047169,
       "learning_rate": 0.0,
       "loss": 1.4734,
       "step": 1
     },
     {
       "epoch": 0.07373271889400922,
+      "grad_norm": 0.27419596643661837,
       "learning_rate": 0.0003,
+      "loss": 1.279,
       "step": 2
     },
     {
       "epoch": 0.11059907834101383,
+      "grad_norm": 0.3351799319426796,
       "learning_rate": 0.0003,
+      "loss": 1.4582,
       "step": 3
     },
     {
       "epoch": 0.14746543778801843,
+      "grad_norm": 0.2789893810816952,
       "learning_rate": 0.0003,
+      "loss": 1.1216,
       "step": 4
     },
     {
       "epoch": 0.18433179723502305,
+      "grad_norm": 0.14190144952821382,
       "learning_rate": 0.0003,
+      "loss": 1.0121,
       "step": 5
     },
     {
       "epoch": 0.22119815668202766,
+      "grad_norm": 0.1542021738719797,
       "learning_rate": 0.0003,
+      "loss": 1.0723,
       "step": 6
     },
     {
       "epoch": 0.25806451612903225,
+      "grad_norm": 0.17476255091645262,
       "learning_rate": 0.0003,
+      "loss": 0.8962,
       "step": 7
     },
     {
       "epoch": 0.29493087557603687,
+      "grad_norm": 0.30183805297227384,
       "learning_rate": 0.0003,
+      "loss": 0.9577,
       "step": 8
     },
     {
       "epoch": 0.3317972350230415,
+      "grad_norm": 0.2945209326852545,
       "learning_rate": 0.0003,
+      "loss": 0.9143,
       "step": 9
     },
     {
       "epoch": 0.3686635944700461,
+      "grad_norm": 0.21480966699076806,
       "learning_rate": 0.0003,
+      "loss": 0.7952,
       "step": 10
     },
     {
       "epoch": 0.4055299539170507,
+      "grad_norm": 0.18078986894945484,
       "learning_rate": 0.0003,
+      "loss": 0.8234,
       "step": 11
     },
     {
       "epoch": 0.4423963133640553,
+      "grad_norm": 0.15453708977718567,
       "learning_rate": 0.0003,
+      "loss": 0.7589,
       "step": 12
     },
     {
       "epoch": 0.4792626728110599,
+      "grad_norm": 0.1631172234239537,
       "learning_rate": 0.0003,
+      "loss": 0.7419,
       "step": 13
     },
     {
       "epoch": 0.5161290322580645,
+      "grad_norm": 0.09781085387100458,
       "learning_rate": 0.0003,
+      "loss": 0.7239,
       "step": 14
     },
     {
       "epoch": 0.5529953917050692,
+      "grad_norm": 0.09897379010199117,
       "learning_rate": 0.0003,
+      "loss": 0.7673,
       "step": 15
     },
     {
       "epoch": 0.5898617511520737,
+      "grad_norm": 0.11558640849854486,
       "learning_rate": 0.0003,
+      "loss": 0.7533,
       "step": 16
     },
     {
       "epoch": 0.6267281105990783,
+      "grad_norm": 0.11345769354838794,
       "learning_rate": 0.0003,
+      "loss": 0.7581,
       "step": 17
     },
     {
       "epoch": 0.663594470046083,
+      "grad_norm": 0.1013501193678853,
       "learning_rate": 0.0003,
+      "loss": 0.7224,
       "step": 18
     },
     {
       "epoch": 0.7004608294930875,
+      "grad_norm": 0.09930580785134363,
       "learning_rate": 0.0003,
+      "loss": 0.7597,
       "step": 19
     },
     {
       "epoch": 0.7373271889400922,
+      "grad_norm": 0.10206714996240562,
       "learning_rate": 0.0003,
+      "loss": 0.704,
       "step": 20
     },
     {
       "epoch": 0.7741935483870968,
+      "grad_norm": 0.10775281367207125,
       "learning_rate": 0.0003,
+      "loss": 0.6639,
       "step": 21
     },
     {
       "epoch": 0.8110599078341014,
+      "grad_norm": 0.12015377273414085,
       "learning_rate": 0.0003,
+      "loss": 0.7494,
       "step": 22
     },
     {
       "epoch": 0.847926267281106,
+      "grad_norm": 0.08770642908913276,
       "learning_rate": 0.0003,
+      "loss": 0.7115,
       "step": 23
     },
     {
       "epoch": 0.8847926267281107,
+      "grad_norm": 0.135245894998221,
       "learning_rate": 0.0003,
+      "loss": 0.7169,
       "step": 24
     },
     {
       "epoch": 0.9216589861751152,
+      "grad_norm": 0.0993611544536447,
       "learning_rate": 0.0003,
+      "loss": 0.6667,
       "step": 25
     },
     {
       "epoch": 0.9585253456221198,
+      "grad_norm": 0.09795283307056235,
       "learning_rate": 0.0003,
+      "loss": 0.6859,
       "step": 26
     },
     {
       "epoch": 0.9953917050691244,
+      "grad_norm": 0.10408097730031732,
       "learning_rate": 0.0003,
+      "loss": 0.6844,
       "step": 27
     },
     {
       "epoch": 0.9953917050691244,
       "step": 27,
       "total_flos": 5728527974400.0,
+      "train_loss": 0.8656142420238919,
+      "train_runtime": 448.8389,
+      "train_samples_per_second": 1.934,
+      "train_steps_per_second": 0.06
     }
   ],
   "logging_steps": 1.0,

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:44b57c4122ef14a59ab2c829b81e181b998d7469d4fea10759be8a6da34dc0f4
 size 7480

 version https://git-lfs.github.com/spec/v1
+oid sha256:28a9ab02716a3e49f85fb4031db8b670ed5bcba6d2cefceaf7c7bea7ce10720b
 size 7480