sedrickkeh commited on
Commit
6c3bb15
1 Parent(s): 7d58b74

Training in progress, epoch 2

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f5ef0db1013e5c871506b0893d2f50a7d980c041b1b18bb37717f091b48e8f4
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08c8545f39bbcc55f9aa75e39d1d7958b20dbb24bdbf3028b0864c3e375dfb90
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5470b4a37c5480f79826097ba70062622392fad23af3ebafb1e18dd949e00241
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae82f784f5c0468450974b4cde4ed543d1869c75483595f1cd442a43e6bce77a
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc65789ae4e46180979164a62b40b15cb07130fc4088bb7df4b43588d8726c45
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a420a32413ab15047591e3d948c8b1328b598187141245eb202a40a65105d822
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba540a2544f9f2cd3efdf27d10b6e48bf55d4d2a0cddc22a294be6f0dd9b09c5
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4740efadc77c9639805a76b1d3e173cfda2bf8594bc2dd212026eb0d15314c8
3
  size 1089994880
trainer_log.jsonl CHANGED
@@ -87,3 +87,47 @@
87
  {"current_steps": 860, "total_steps": 1314, "loss": 0.603, "learning_rate": 1.6096656715039868e-06, "epoch": 1.9606837606837608, "percentage": 65.45, "elapsed_time": "12:42:01", "remaining_time": "6:42:16"}
88
  {"current_steps": 870, "total_steps": 1314, "loss": 0.6006, "learning_rate": 1.5478974620685922e-06, "epoch": 1.9834757834757835, "percentage": 66.21, "elapsed_time": "12:50:47", "remaining_time": "6:33:22"}
89
  {"current_steps": 877, "total_steps": 1314, "eval_loss": 0.6204226016998291, "epoch": 1.9994301994301993, "percentage": 66.74, "elapsed_time": "13:04:14", "remaining_time": "6:30:46"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  {"current_steps": 860, "total_steps": 1314, "loss": 0.603, "learning_rate": 1.6096656715039868e-06, "epoch": 1.9606837606837608, "percentage": 65.45, "elapsed_time": "12:42:01", "remaining_time": "6:42:16"}
88
  {"current_steps": 870, "total_steps": 1314, "loss": 0.6006, "learning_rate": 1.5478974620685922e-06, "epoch": 1.9834757834757835, "percentage": 66.21, "elapsed_time": "12:50:47", "remaining_time": "6:33:22"}
89
  {"current_steps": 877, "total_steps": 1314, "eval_loss": 0.6204226016998291, "epoch": 1.9994301994301993, "percentage": 66.74, "elapsed_time": "13:04:14", "remaining_time": "6:30:46"}
90
+ {"current_steps": 880, "total_steps": 1314, "loss": 0.6233, "learning_rate": 1.4868018001115166e-06, "epoch": 2.006837606837607, "percentage": 66.97, "elapsed_time": "13:08:02", "remaining_time": "6:28:39"}
91
+ {"current_steps": 890, "total_steps": 1314, "loss": 0.5696, "learning_rate": 1.4264218424690759e-06, "epoch": 2.0296296296296297, "percentage": 67.73, "elapsed_time": "13:16:49", "remaining_time": "6:19:36"}
92
+ {"current_steps": 900, "total_steps": 1314, "loss": 0.5855, "learning_rate": 1.3668002404174047e-06, "epoch": 2.0524216524216525, "percentage": 68.49, "elapsed_time": "13:25:36", "remaining_time": "6:10:34"}
93
+ {"current_steps": 910, "total_steps": 1314, "loss": 0.5834, "learning_rate": 1.307979109544388e-06, "epoch": 2.0752136752136754, "percentage": 69.25, "elapsed_time": "13:34:22", "remaining_time": "6:01:32"}
94
+ {"current_steps": 920, "total_steps": 1314, "loss": 0.5865, "learning_rate": 1.2500000000000007e-06, "epoch": 2.098005698005698, "percentage": 70.02, "elapsed_time": "13:43:09", "remaining_time": "5:52:31"}
95
+ {"current_steps": 930, "total_steps": 1314, "loss": 0.584, "learning_rate": 1.1929038671460486e-06, "epoch": 2.1207977207977207, "percentage": 70.78, "elapsed_time": "13:51:56", "remaining_time": "5:43:30"}
96
+ {"current_steps": 940, "total_steps": 1314, "loss": 0.5787, "learning_rate": 1.136731042626073e-06, "epoch": 2.1435897435897435, "percentage": 71.54, "elapsed_time": "14:00:43", "remaining_time": "5:34:29"}
97
+ {"current_steps": 950, "total_steps": 1314, "loss": 0.5796, "learning_rate": 1.0815212058758218e-06, "epoch": 2.1663817663817664, "percentage": 72.3, "elapsed_time": "14:09:30", "remaining_time": "5:25:29"}
98
+ {"current_steps": 960, "total_steps": 1314, "loss": 0.5845, "learning_rate": 1.0273133560944432e-06, "epoch": 2.1891737891737892, "percentage": 73.06, "elapsed_time": "14:18:17", "remaining_time": "5:16:29"}
99
+ {"current_steps": 970, "total_steps": 1314, "loss": 0.5833, "learning_rate": 9.741457846961721e-07, "epoch": 2.211965811965812, "percentage": 73.82, "elapsed_time": "14:27:05", "remaining_time": "5:07:30"}
100
+ {"current_steps": 980, "total_steps": 1314, "loss": 0.573, "learning_rate": 9.220560482619956e-07, "epoch": 2.234757834757835, "percentage": 74.58, "elapsed_time": "14:35:51", "remaining_time": "4:58:30"}
101
+ {"current_steps": 990, "total_steps": 1314, "loss": 0.5819, "learning_rate": 8.710809420103788e-07, "epoch": 2.2575498575498574, "percentage": 75.34, "elapsed_time": "14:44:37", "remaining_time": "4:49:30"}
102
+ {"current_steps": 1000, "total_steps": 1314, "loss": 0.5821, "learning_rate": 8.21256473805811e-07, "epoch": 2.2803418803418802, "percentage": 76.1, "elapsed_time": "14:53:22", "remaining_time": "4:40:31"}
103
+ {"current_steps": 1010, "total_steps": 1314, "loss": 0.5796, "learning_rate": 7.726178387235234e-07, "epoch": 2.303133903133903, "percentage": 76.86, "elapsed_time": "15:02:09", "remaining_time": "4:31:32"}
104
+ {"current_steps": 1020, "total_steps": 1314, "loss": 0.5825, "learning_rate": 7.251993941883428e-07, "epoch": 2.325925925925926, "percentage": 77.63, "elapsed_time": "15:10:54", "remaining_time": "4:22:33"}
105
+ {"current_steps": 1030, "total_steps": 1314, "loss": 0.584, "learning_rate": 6.790346357052443e-07, "epoch": 2.348717948717949, "percentage": 78.39, "elapsed_time": "15:19:39", "remaining_time": "4:13:34"}
106
+ {"current_steps": 1040, "total_steps": 1314, "loss": 0.5816, "learning_rate": 6.341561731987572e-07, "epoch": 2.3715099715099717, "percentage": 79.15, "elapsed_time": "15:28:24", "remaining_time": "4:04:36"}
107
+ {"current_steps": 1050, "total_steps": 1314, "loss": 0.5808, "learning_rate": 5.905957079779187e-07, "epoch": 2.394301994301994, "percentage": 79.91, "elapsed_time": "15:37:10", "remaining_time": "3:55:37"}
108
+ {"current_steps": 1060, "total_steps": 1314, "loss": 0.589, "learning_rate": 5.483840103430599e-07, "epoch": 2.417094017094017, "percentage": 80.67, "elapsed_time": "15:45:57", "remaining_time": "3:46:40"}
109
+ {"current_steps": 1070, "total_steps": 1314, "loss": 0.5922, "learning_rate": 5.0755089785024e-07, "epoch": 2.43988603988604, "percentage": 81.43, "elapsed_time": "15:54:44", "remaining_time": "3:37:42"}
110
+ {"current_steps": 1080, "total_steps": 1314, "loss": 0.5874, "learning_rate": 4.6812521424868416e-07, "epoch": 2.4626780626780627, "percentage": 82.19, "elapsed_time": "16:03:30", "remaining_time": "3:28:45"}
111
+ {"current_steps": 1090, "total_steps": 1314, "loss": 0.5847, "learning_rate": 4.301348091060906e-07, "epoch": 2.4854700854700855, "percentage": 82.95, "elapsed_time": "16:12:17", "remaining_time": "3:19:48"}
112
+ {"current_steps": 1100, "total_steps": 1314, "loss": 0.5914, "learning_rate": 3.936065181362211e-07, "epoch": 2.5082621082621084, "percentage": 83.71, "elapsed_time": "16:21:04", "remaining_time": "3:10:51"}
113
+ {"current_steps": 1110, "total_steps": 1314, "loss": 0.5844, "learning_rate": 3.585661442426494e-07, "epoch": 2.5310541310541312, "percentage": 84.47, "elapsed_time": "16:29:51", "remaining_time": "3:01:55"}
114
+ {"current_steps": 1120, "total_steps": 1314, "loss": 0.5826, "learning_rate": 3.250384392920741e-07, "epoch": 2.5538461538461537, "percentage": 85.24, "elapsed_time": "16:38:37", "remaining_time": "2:52:58"}
115
+ {"current_steps": 1130, "total_steps": 1314, "loss": 0.5819, "learning_rate": 2.930470866300583e-07, "epoch": 2.5766381766381765, "percentage": 86.0, "elapsed_time": "16:47:24", "remaining_time": "2:44:02"}
116
+ {"current_steps": 1140, "total_steps": 1314, "loss": 0.5816, "learning_rate": 2.626146843515598e-07, "epoch": 2.5994301994301994, "percentage": 86.76, "elapsed_time": "16:56:09", "remaining_time": "2:35:05"}
117
+ {"current_steps": 1150, "total_steps": 1314, "loss": 0.5817, "learning_rate": 2.3376272933805482e-07, "epoch": 2.6222222222222222, "percentage": 87.52, "elapsed_time": "17:04:55", "remaining_time": "2:26:09"}
118
+ {"current_steps": 1160, "total_steps": 1314, "loss": 0.5824, "learning_rate": 2.065116020725433e-07, "epoch": 2.645014245014245, "percentage": 88.28, "elapsed_time": "17:13:42", "remaining_time": "2:17:13"}
119
+ {"current_steps": 1170, "total_steps": 1314, "loss": 0.5858, "learning_rate": 1.80880552243157e-07, "epoch": 2.667806267806268, "percentage": 89.04, "elapsed_time": "17:22:27", "remaining_time": "2:08:18"}
120
+ {"current_steps": 1180, "total_steps": 1314, "loss": 0.5808, "learning_rate": 1.5688768514553587e-07, "epoch": 2.690598290598291, "percentage": 89.8, "elapsed_time": "17:31:14", "remaining_time": "1:59:22"}
121
+ {"current_steps": 1190, "total_steps": 1314, "loss": 0.5817, "learning_rate": 1.345499488935875e-07, "epoch": 2.7133903133903132, "percentage": 90.56, "elapsed_time": "17:39:59", "remaining_time": "1:50:27"}
122
+ {"current_steps": 1200, "total_steps": 1314, "loss": 0.5849, "learning_rate": 1.1388312244765332e-07, "epoch": 2.736182336182336, "percentage": 91.32, "elapsed_time": "17:48:44", "remaining_time": "1:41:31"}
123
+ {"current_steps": 1210, "total_steps": 1314, "loss": 0.5881, "learning_rate": 9.490180446854791e-08, "epoch": 2.758974358974359, "percentage": 92.09, "elapsed_time": "17:57:30", "remaining_time": "1:32:36"}
124
+ {"current_steps": 1220, "total_steps": 1314, "loss": 0.5778, "learning_rate": 7.7619403005334e-08, "epoch": 2.781766381766382, "percentage": 92.85, "elapsed_time": "18:06:15", "remaining_time": "1:23:41"}
125
+ {"current_steps": 1230, "total_steps": 1314, "loss": 0.5763, "learning_rate": 6.204812602412901e-08, "epoch": 2.8045584045584047, "percentage": 93.61, "elapsed_time": "18:15:02", "remaining_time": "1:14:46"}
126
+ {"current_steps": 1240, "total_steps": 1314, "loss": 0.5913, "learning_rate": 4.8198972784625205e-08, "epoch": 2.827350427350427, "percentage": 94.37, "elapsed_time": "18:23:47", "remaining_time": "1:05:52"}
127
+ {"current_steps": 1250, "total_steps": 1314, "loss": 0.5882, "learning_rate": 3.608172607041632e-08, "epoch": 2.8501424501424504, "percentage": 95.13, "elapsed_time": "18:32:33", "remaining_time": "0:56:57"}
128
+ {"current_steps": 1260, "total_steps": 1314, "loss": 0.5852, "learning_rate": 2.5704945278623438e-08, "epoch": 2.872934472934473, "percentage": 95.89, "elapsed_time": "18:41:19", "remaining_time": "0:48:03"}
129
+ {"current_steps": 1270, "total_steps": 1314, "loss": 0.5861, "learning_rate": 1.7075960373695022e-08, "epoch": 2.8957264957264957, "percentage": 96.65, "elapsed_time": "18:50:05", "remaining_time": "0:39:09"}
130
+ {"current_steps": 1280, "total_steps": 1314, "loss": 0.5763, "learning_rate": 1.0200866709657863e-08, "epoch": 2.9185185185185185, "percentage": 97.41, "elapsed_time": "18:58:53", "remaining_time": "0:30:15"}
131
+ {"current_steps": 1290, "total_steps": 1314, "loss": 0.5762, "learning_rate": 5.08452072447152e-09, "epoch": 2.9413105413105414, "percentage": 98.17, "elapsed_time": "19:07:39", "remaining_time": "0:21:21"}
132
+ {"current_steps": 1300, "total_steps": 1314, "loss": 0.5775, "learning_rate": 1.7305365095324211e-09, "epoch": 2.9641025641025642, "percentage": 98.93, "elapsed_time": "19:16:27", "remaining_time": "0:12:27"}
133
+ {"current_steps": 1310, "total_steps": 1314, "loss": 0.5877, "learning_rate": 1.412832567460054e-10, "epoch": 2.9868945868945866, "percentage": 99.7, "elapsed_time": "19:25:15", "remaining_time": "0:03:33"}