ApfelSchorle commited on
Commit
9e92d30
1 Parent(s): 844e982

upload All

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ 3BSanokaKai2/table.txt filter=lfs diff=lfs merge=lfs -text
3BSanokaKai2/AI-Large.py ADDED
@@ -0,0 +1,708 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Thu Mar 21 10:34:46 2024
4
+
5
+ @author: takan
6
+ """
7
+
8
+ import MeCab
9
+ import torch
10
+ import copy
11
+ import time
12
+ import matplotlib.pyplot as plt
13
+ import re
14
+ import math
15
+ import numpy as np
16
+ from gensim.models import Word2Vec
17
+ import pickle
18
+ import threading
19
+ import sentencepiece as spm
20
+
21
+ class DenseBlock(torch.nn.Module):
22
+ def __init__(self, dim, mul=1):
23
+ super().__init__()
24
+ self.I = torch.nn.Linear(dim, dim*mul)
25
+ self.O = torch.nn.Linear(dim*mul, dim)
26
+ def forward(self, x):
27
+ x = self.I(x)
28
+ x = torch.nn.functional.elu(x)
29
+ x = self.O(x)
30
+ return x
31
+
32
+ class AttentionBlock(torch.nn.Module):
33
+ def __init__(self, dim, mul=1):
34
+ super().__init__()
35
+ self.Q = torch.nn.Linear(dim, dim*mul)
36
+ self.K = torch.nn.Linear(dim, dim*mul)
37
+ self.V = torch.nn.Linear(dim, dim*mul)
38
+ self.O = torch.nn.Linear(dim*mul, dim)
39
+ def forward(self, q,k,v):
40
+ q = self.Q(q)
41
+ k = self.K(k)
42
+ v = self.V(v)
43
+ x = torch.nn.functional.softmax(q * k, dim=-1) * v
44
+ x = self.O(x)
45
+ return x
46
+ """
47
+ class AttentionBlock(torch.nn.Module):
48
+ def __init__(self, dim, mul=1):
49
+ super().__init__()
50
+ self.attn = torch.nn.MultiheadAttention(dim, 16, batch_first=True)
51
+ def forward(self, q,k,v):
52
+ x = self.attn(q, k, v)[0]
53
+ return x
54
+ """
55
+ class SanokaLayer(torch.nn.Module):
56
+ def __init__(self, dim, mul=1):
57
+ super().__init__()
58
+ self.x = None
59
+ self.A = AttentionBlock(dim, mul)
60
+ self.B = DenseBlock(dim, mul)
61
+ def reset(self, x=None):
62
+ self.x = x
63
+ def forward(self, u):
64
+ if (self.x != None):
65
+ uu = torch.nn.functional.normalize(u)
66
+ xx = torch.nn.functional.normalize(self.x)
67
+ x = self.A(uu, xx, xx)
68
+ y = self.B(torch.nn.functional.normalize(x)) + u
69
+ self.x = x + self.x
70
+ return y
71
+ else:
72
+ uu = torch.nn.functional.normalize(u)
73
+ x = self.A(uu, uu, uu)
74
+ y = self.B(torch.nn.functional.normalize(x)) + u
75
+ self.x = x
76
+ return y
77
+
78
+ class SanokaModel(torch.nn.Module):
79
+ def __init__(self, dim, mul=1, Top=True):
80
+ super().__init__()
81
+ self.Top = Top
82
+ if (Top):
83
+ self.I = torch.nn.Linear(128, dim)
84
+ self.A = SanokaLayer(dim, mul)
85
+ self.B = SanokaLayer(dim, mul)
86
+ self.C = SanokaLayer(dim, mul)
87
+ self.D = SanokaLayer(dim, mul)
88
+ self.E = SanokaLayer(dim, mul)
89
+ self.F = SanokaLayer(dim, mul)
90
+ def reset(self):
91
+ self.A.reset()
92
+ self.B.reset()
93
+ self.C.reset()
94
+ self.D.reset()
95
+ self.E.reset()
96
+ self.F.reset()
97
+
98
+ def forward(self, x):
99
+ if (self.Top):
100
+ x = self.I(x)
101
+ x = self.A(x)
102
+ x = self.B(x)
103
+ x = self.C(x)
104
+ x = self.D(x)
105
+ x = self.E(x)
106
+ x = self.F(x)
107
+
108
+ return x
109
+
110
+ class OutputLayer (torch.nn.Module):
111
+ def __init__(self, hiddendim, worddim=59000, heads=4):
112
+ super().__init__()
113
+ self.H = torch.nn.Linear(hiddendim, worddim)
114
+ def forward(self, inpute):
115
+ x = inpute
116
+ x = self.H(x)
117
+ return x
118
+
119
+ def GOILOAD():
120
+ fuf = open("table.txt", "r", encoding="UTF-8")
121
+ goi = fuf.read().split("\n")
122
+ fuf.close()
123
+ chardim = len(goi[1:])
124
+ charid = {goi[i+1].split()[0]:i for i in range(chardim-1)}
125
+ return charid, [goi[ia+1].split()[0] for ia in range(chardim-1)]
126
+
127
+ datas = []
128
+ trues = []
129
+ lens = []
130
+ dones = 0
131
+ def Convert(buns, table, maxlen=256):
132
+ buns = buns.split("\n")
133
+ sp = spm.SentencePieceProcessor()
134
+ sp.Load("tokenizer.model")
135
+ w2v = Word2Vec.load("word2vec.model")
136
+ data = []
137
+ true = []
138
+ lena = []
139
+ for datac in range(len(buns)):
140
+ #print(datac)
141
+ #print(buns[datac])
142
+ error = False
143
+ try:
144
+ buna = sp.EncodeAsPieces(buns[datac])[:maxlen]
145
+ a = torch.from_numpy(w2v.wv[buna])
146
+ b = torch.tensor([table[buna[ii]] for ii in range(len(buna))])
147
+ ll = len(buna)
148
+ c = ll
149
+ except:
150
+ print("ERROR")
151
+ else:
152
+ data.append(a)
153
+ true.append(b)
154
+ lena.append(c)
155
+ print(datac)
156
+ f = open("Train_Data.bin", "wb")
157
+ pickle.dump((data, true, lena), f)
158
+ f.close()
159
+ return
160
+
161
+ def SPMake():
162
+
163
+ spm.SentencePieceTrainer.Train(f"--input=train_data.txt --model_prefix=tokenizer --vocab_size=20000 --train_extremely_large_corpus=True")
164
+ def W2VMake(filepath="train_data.txt", mincount=50, worker=60):
165
+ sp = spm.SentencePieceProcessor()
166
+ sp.Load("tokenizer.model")
167
+ f = open(filepath, mode="r", encoding="UTF-8")
168
+ texts = f.read().split("\n")
169
+ f.close()
170
+ dat = []
171
+ print(len(texts))
172
+ for a in range(len(texts)):
173
+ dat.append(sp.EncodeAsPieces(texts[a]))
174
+ print(a)
175
+
176
+ model = Word2Vec(sentences=dat, vector_size=128, window=100, min_count=mincount, workers=worker)
177
+ model.save("word2vec.model")
178
+ model.wv.save_word2vec_format('table.txt')
179
+
180
+ def DataMake(filepath="train_data.txt", maxlen=129):
181
+ table, i2w = GOILOAD()
182
+ print(len(table))
183
+ time.sleep(1)
184
+ f = open(filepath, mode="r", encoding="UTF-8")
185
+ txt = f.read()
186
+ f.close()
187
+ Convert(txt, table)
188
+ return None
189
+
190
+ def PreTrain(Load=False, dim=512, outputdim=40000, lr=1e-04, epoch=10, epochload=1000,usedata=480000, onestep=100, uselen=64):
191
+ global datas
192
+ global trues
193
+ global lens
194
+ torch.manual_seed(1293431)
195
+ #torch.manual_seed(576765)
196
+ device1 = torch.device("cuda:0")
197
+ device2 = torch.device("cuda:1")
198
+ device3 = torch.device("cuda:2")
199
+ device4 = torch.device("cuda:3")
200
+ device5 = torch.device("cuda:4")
201
+ device6 = torch.device("cuda:5")
202
+ device7 = torch.device("cuda:6")
203
+ lossf = torch.nn.CrossEntropyLoss()
204
+ model1 = SanokaModel(dim, 2, True).to(torch.bfloat16).to(device1)
205
+ model2 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device2)
206
+ model3 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device3)
207
+ model4 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device4)
208
+ model5 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device5)
209
+ model6 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device6)
210
+ output = OutputLayer(dim, outputdim).to(torch.bfloat16).to(device7)
211
+
212
+ if (Load):
213
+ model1.load_state_dict(torch.load("LLM1.pth", map_location=device1))
214
+ model2.load_state_dict(torch.load("LLM2.pth", map_location=device2))
215
+ model3.load_state_dict(torch.load("LLM3.pth", map_location=device3))
216
+ model4.load_state_dict(torch.load("LLM4.pth", map_location=device4))
217
+ model5.load_state_dict(torch.load("LLM5.pth", map_location=device5))
218
+ model6.load_state_dict(torch.load("LLM6.pth", map_location=device6))
219
+ output.load_state_dict(torch.load("output.pth", map_location=device7))
220
+ model1Optim = torch.optim.Adam(model1.parameters(), lr=lr)
221
+ model2Optim = torch.optim.Adam(model2.parameters(), lr=lr)
222
+ model3Optim = torch.optim.Adam(model3.parameters(), lr=lr)
223
+ model4Optim = torch.optim.Adam(model4.parameters(), lr=lr)
224
+ model5Optim = torch.optim.Adam(model5.parameters(), lr=lr)
225
+ model6Optim = torch.optim.Adam(model6.parameters(), lr=lr)
226
+ outputO = torch.optim.Adam(output.parameters(), lr=lr)
227
+ f = open("Train_Data.bin", "rb")
228
+ datas, trues, lens = pickle.load(f)
229
+ f.close()
230
+ train_x = torch.zeros((epochload, uselen, 128)).to(torch.bfloat16).to(device1)
231
+ train_y = torch.full((epochload, uselen), outputdim - 1, dtype=torch.long).to(device7)
232
+ table, i2w = GOILOAD()
233
+ base = 0
234
+ epoch = int(np.floor((len(datas) / epochload) * epoch))
235
+ print("データ量", len(datas))
236
+ for epochs in range(epoch):
237
+ train_x = train_x.detach()
238
+ train_y = train_y.detach()
239
+ if (base < len(datas) - epochload*2):
240
+ base += epochload
241
+ else:
242
+ base = 0
243
+ if (base > usedata):
244
+ base = 0
245
+ for b in range(epochload):
246
+ a = b + base
247
+ leng = lens[a]
248
+ if (leng > uselen):
249
+ leng = uselen
250
+
251
+ train_x[b, :datas[a].shape[0]] = datas[a].to(torch.bfloat16).to(device1)[:uselen]
252
+ train_y[b, :trues[a].shape[0]] = trues[a].to(device7).to(torch.long)[:uselen]
253
+ epls = 0.00
254
+ timem = time.time()
255
+ for steps in range(epochload//onestep):
256
+ model1.reset()
257
+ model2.reset()
258
+ model3.reset()
259
+ model4.reset()
260
+ model5.reset()
261
+ model6.reset()
262
+ oa = ""
263
+ model1Optim.zero_grad()
264
+ model2Optim.zero_grad()
265
+ model3Optim.zero_grad()
266
+ model4Optim.zero_grad()
267
+ model5Optim.zero_grad()
268
+ model6Optim.zero_grad()
269
+ outputO.zero_grad()
270
+ loss = 0.00
271
+ for b in range(uselen-1):
272
+ out = model1(train_x[steps*onestep:steps*onestep+onestep, b])
273
+ out = model2(out.to(device2))
274
+ out = model3(out.to(device3))
275
+ out = model4(out.to(device4))
276
+ out = model5(out.to(device5))
277
+ out = model6(out.to(device6))
278
+ out = output(out.to(device7))
279
+ loss += lossf(out, train_y[steps*onestep:steps*onestep+onestep, b+1])
280
+ epls += loss
281
+
282
+ sfo = torch.nn.functional.softmax(out[0], dim=-1)
283
+ wid = torch.argmax(sfo, dim=-1).item()
284
+ try:
285
+ wd = i2w[wid]
286
+ except:
287
+ oa = oa + "ERROR"
288
+ else:
289
+ oa = oa + wd
290
+
291
+ loss.backward()
292
+ #print(b)
293
+ model1Optim.step()
294
+ model2Optim.step()
295
+ model3Optim.step()
296
+ model4Optim.step()
297
+ model5Optim.step()
298
+ model6Optim.step()
299
+ outputO.step()
300
+ print("出力サンプル> ", oa[:32].replace("?", ""))
301
+ print("epoch", epochs,"Train_epoch_sum_loss", epls.item(), "time", time.time() - timem)
302
+ if (epochs % 10 == 9):
303
+ torch.save(model1.state_dict(), "LLM1.pth")
304
+ torch.save(model2.state_dict(), "LLM2.pth")
305
+ torch.save(model3.state_dict(), "LLM3.pth")
306
+ torch.save(model4.state_dict(), "LLM4.pth")
307
+ torch.save(model5.state_dict(), "LLM5.pth")
308
+ torch.save(model6.state_dict(), "LLM6.pth")
309
+ torch.save(output.state_dict(), "output.pth")
310
+ def Fineturning(Load=False, dim=512, outputdim=40000, lr=1e-04, epoch=10000, epochload=1000, onestep=200, uselen=32):
311
+ global datas
312
+ global trues
313
+ global lens
314
+ torch.manual_seed(1293431)
315
+ #torch.manual_seed(576765)
316
+ device1 = torch.device("cuda:0")
317
+ device2 = torch.device("cuda:1")
318
+ device3 = torch.device("cuda:2")
319
+ device4 = torch.device("cuda:3")
320
+ device5 = torch.device("cuda:4")
321
+ device6 = torch.device("cuda:5")
322
+ device7 = torch.device("cuda:6")
323
+ lossf = torch.nn.CrossEntropyLoss()
324
+ model1 = SanokaModel(dim, 2, True).to(torch.bfloat16).to(device1)
325
+ model2 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device2)
326
+ model3 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device3)
327
+ model4 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device4)
328
+ model5 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device5)
329
+ model6 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device6)
330
+ output = OutputLayer(dim, outputdim).to(torch.bfloat16).to(device7)
331
+
332
+ model1.load_state_dict(torch.load("LLM1.pth", map_location=device1))
333
+ model2.load_state_dict(torch.load("LLM2.pth", map_location=device2))
334
+ model3.load_state_dict(torch.load("LLM3.pth", map_location=device3))
335
+ model4.load_state_dict(torch.load("LLM4.pth", map_location=device4))
336
+ model5.load_state_dict(torch.load("LLM5.pth", map_location=device5))
337
+ model6.load_state_dict(torch.load("LLM6.pth", map_location=device6))
338
+ output.load_state_dict(torch.load("output.pth", map_location=device7))
339
+ model1Optim = torch.optim.Adam(model1.parameters(), lr=lr)
340
+ model2Optim = torch.optim.Adam(model2.parameters(), lr=lr)
341
+ model3Optim = torch.optim.Adam(model3.parameters(), lr=lr)
342
+ model4Optim = torch.optim.Adam(model4.parameters(), lr=lr)
343
+ model5Optim = torch.optim.Adam(model5.parameters(), lr=lr)
344
+ model6Optim = torch.optim.Adam(model6.parameters(), lr=lr/500)
345
+ outputO = torch.optim.Adam(output.parameters(), lr=lr)
346
+ f = open("Train_Data.bin", "rb")
347
+ datas, trues, lens = pickle.load(f)
348
+ f.close()
349
+ train_x = torch.zeros((epochload, uselen, 128)).to(torch.bfloat16).to(device1)
350
+ train_y = torch.full((epochload, uselen), outputdim - 1, dtype=torch.long).to(device7)
351
+ table, i2w = GOILOAD()
352
+ base = 0
353
+ epoch = int(np.floor((len(datas) / epochload) * epoch))
354
+ #print(epoch)
355
+ for epochs in range(epoch):
356
+ train_x = train_x.detach()
357
+ train_y = train_y.detach()
358
+ if (base < len(datas) - epochload*2):
359
+ base += epochload
360
+ else:
361
+ base = 0
362
+ for b in range(epochload):
363
+ a = b + base
364
+ #print(a)
365
+ leng = lens[a]
366
+ if (leng > uselen):
367
+ leng = uselen
368
+
369
+ train_x[b, :datas[a].shape[0]] = datas[a].to(torch.bfloat16).to(device1)[:uselen]
370
+ train_y[b, :trues[a].shape[0]] = trues[a].to(device7).to(torch.long)[:uselen]
371
+ epls = 0.00
372
+ timem = time.time()
373
+ for steps in range(epochload//onestep):
374
+ model1.reset()
375
+ model2.reset()
376
+ model3.reset()
377
+ model4.reset()
378
+ model5.reset()
379
+ model6.reset()
380
+ oa = ""
381
+ loss = 0.00
382
+ model1Optim.zero_grad()
383
+ model2Optim.zero_grad()
384
+ model3Optim.zero_grad()
385
+ model4Optim.zero_grad()
386
+ model5Optim.zero_grad()
387
+ model6Optim.zero_grad()
388
+ outputO.zero_grad()
389
+ for b in range(uselen-1):
390
+ with torch.no_grad():
391
+ out = model1(train_x[steps*onestep:steps*onestep+onestep, b])
392
+ out = model2(out.to(device2))
393
+ out = model3(out.to(device3))
394
+ out = model4(out.to(device4))
395
+ out = model5(out.to(device5))
396
+ out = model6(out.to(device6))
397
+ out = output(out.to(device7))
398
+ loss += lossf(out, train_y[steps*onestep:steps*onestep+onestep, b+1])
399
+ epls += loss.item()
400
+
401
+ sfo = torch.nn.functional.softmax(out[0], dim=-1)
402
+ wid = torch.argmax(sfo, dim=-1).item()
403
+ try:
404
+ wd = i2w[wid]
405
+ except:
406
+ oa = oa + "ERROR"
407
+ else:
408
+ oa = oa + wd
409
+ loss.backward()
410
+ #model6Optim.step()
411
+ outputO.step()
412
+ print("出力サンプル> ", oa[:32].replace("?", ""))
413
+ print("epoch", epochs,"Train_epoch_sum_loss", epls, "time", time.time() - timem)
414
+ if (epochs % 10 == 9):
415
+ #torch.save(model6.state_dict(), "LLM6F.pth")
416
+ torch.save(output.state_dict(), "fineturning.pth")
417
+ def Predict(dim=512, outputdim=40000, maxlen=32):
418
+
419
+ torch.manual_seed(1293431)
420
+
421
+ table, i2w = GOILOAD()
422
+ sp = spm.SentencePieceProcessor()
423
+ sp.Load("tokenizer.model")
424
+
425
+ w2v = Word2Vec.load("word2vec.model")
426
+
427
+ device1 = torch.device("cuda:0")
428
+
429
+ device2 = torch.device("cuda:1")
430
+
431
+ device3 = torch.device("cuda:2")
432
+
433
+ device4 = torch.device("cuda:3")
434
+
435
+ device5 = torch.device("cuda:4")
436
+
437
+ device6 = torch.device("cuda:5")
438
+
439
+ device7 = torch.device("cuda:6")
440
+
441
+ lossf = torch.nn.CrossEntropyLoss()
442
+
443
+ model1 = SanokaModel(dim, 2, True).to(torch.bfloat16).to(device1)
444
+
445
+ model2 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device2)
446
+
447
+ model3 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device3)
448
+
449
+ model4 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device4)
450
+
451
+ model5 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device5)
452
+
453
+ model6 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device6)
454
+
455
+ output = OutputLayer(dim, outputdim).to(torch.bfloat16).to(device7)
456
+
457
+
458
+
459
+ model1.load_state_dict(torch.load("LLM1.pth", map_location=device1))
460
+
461
+ model2.load_state_dict(torch.load("LLM2.pth", map_location=device2))
462
+
463
+ model3.load_state_dict(torch.load("LLM3.pth", map_location=device3))
464
+
465
+ model4.load_state_dict(torch.load("LLM4.pth", map_location=device4))
466
+
467
+ model5.load_state_dict(torch.load("LLM5.pth", map_location=device5))
468
+
469
+ model6.load_state_dict(torch.load("LLM6.pth", map_location=device6))
470
+
471
+ output.load_state_dict(torch.load("fineturning.pth", map_location=device7))
472
+
473
+ while(1):
474
+
475
+ dd = input("Q> ")# + ","
476
+
477
+
478
+
479
+ data = []
480
+
481
+ buna = sp.EncodeAsPieces(dd)
482
+
483
+ print(buna)
484
+
485
+ for a in range(len(buna)):
486
+
487
+ try:
488
+
489
+ data.append(torch.from_numpy(w2v.wv[buna[a]]).view(1, 1, 128).to(device1))
490
+
491
+ except KeyError:
492
+
493
+ print("Not Found")
494
+
495
+ dat = torch.cat(data, dim=1).to(device1)
496
+
497
+ oa = ""
498
+
499
+ with torch.no_grad():
500
+
501
+ model1.reset()
502
+
503
+ model2.reset()
504
+
505
+ model3.reset()
506
+
507
+ model4.reset()
508
+
509
+ model5.reset()
510
+
511
+ model6.reset()
512
+
513
+ oa = ""
514
+
515
+ for a in range(dat.shape[1] - 1):
516
+
517
+ out = model1(dat[:, a].to(torch.bfloat16))
518
+
519
+ out = model2(out.to(device2))
520
+
521
+ out = model3(out.to(device3))
522
+
523
+ out = model4(out.to(device4))
524
+
525
+ out = model5(out.to(device5))
526
+
527
+ out = model6(out.to(device6))
528
+
529
+ out = output(out.to(device7))
530
+
531
+ for b in range(maxlen - dat.shape[1]):
532
+
533
+ out = model1(dat[:, -1].to(torch.bfloat16))
534
+
535
+ out = model2(out.to(device2))
536
+
537
+ out = model3(out.to(device3))
538
+
539
+ out = model4(out.to(device4))
540
+
541
+ out = model5(out.to(device5))
542
+
543
+ out = model6(out.to(device6))
544
+
545
+ out = output(out.to(device7))
546
+
547
+ sfo = torch.nn.functional.softmax(out, dim=-1)
548
+
549
+ wid = torch.argmax(sfo, dim=-1).item()
550
+
551
+ if (wid != outputdim - 1):
552
+
553
+ try:
554
+
555
+ wd = i2w[wid]
556
+
557
+ except:
558
+
559
+ oa = oa + "ERROR"
560
+
561
+ else:
562
+
563
+ oa = oa + wd
564
+
565
+ dat = torch.cat([dat, torch.from_numpy(w2v.wv[wd]).to(device1).view(1, 1, 128)], dim=1)
566
+
567
+ print("A> ", oa.replace("?", ""))
568
+
569
+ def ValidationLoss(dim=512, outputdim=40000, maxlen=32):
570
+
571
+ torch.manual_seed(1293431)
572
+
573
+ table, i2w = GOILOAD()
574
+
575
+ tagger = MeCab.Tagger("-Owakati")
576
+
577
+ w2v = Word2Vec.load("word2vec.model")
578
+
579
+ device1 = torch.device("cuda:0")
580
+
581
+ device2 = torch.device("cuda:1")
582
+
583
+ device3 = torch.device("cuda:2")
584
+
585
+ device4 = torch.device("cuda:3")
586
+
587
+ device5 = torch.device("cuda:4")
588
+
589
+ device6 = torch.device("cuda:5")
590
+
591
+ device7 = torch.device("cuda:6")
592
+
593
+ lossf = torch.nn.CrossEntropyLoss()
594
+
595
+ model1 = SanokaModel(dim, 2, True).to(torch.bfloat16).to(device1)
596
+
597
+ model2 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device2)
598
+
599
+ model3 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device3)
600
+
601
+ model4 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device4)
602
+
603
+ model5 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device5)
604
+
605
+ model6 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device6)
606
+
607
+ output = OutputLayer(dim, outputdim).to(torch.bfloat16).to(device7)
608
+
609
+
610
+
611
+ model1.load_state_dict(torch.load("LLM1.pth", map_location=device1))
612
+
613
+ model2.load_state_dict(torch.load("LLM2.pth", map_location=device2))
614
+
615
+ model3.load_state_dict(torch.load("LLM3.pth", map_location=device3))
616
+
617
+ model4.load_state_dict(torch.load("LLM4.pth", map_location=device4))
618
+
619
+ model5.load_state_dict(torch.load("LLM5.pth", map_location=device5))
620
+
621
+ model6.load_state_dict(torch.load("LLM6.pth", map_location=device6))
622
+
623
+ output.load_state_dict(torch.load("output.pth", map_location=device7))
624
+
625
+ dd = input("TestData> ")
626
+
627
+ lossf = torch.nn.CrossEntropyLoss()
628
+
629
+ data = []
630
+
631
+ buna = tagger.parse(dd).split()
632
+
633
+ trued = torch.tensor([table[dfg] for dfg in buna]).to(torch.long).unsqueeze(dim=0)
634
+
635
+ print(buna)
636
+
637
+ print(trued)
638
+
639
+ for a in range(len(buna)):
640
+
641
+ try:
642
+
643
+ data.append(torch.from_numpy(w2v.wv[buna[a]]).view(1, 1, 128).to(device1))
644
+
645
+ except KeyError:
646
+
647
+ print("Not Found")
648
+
649
+ dat = torch.cat(data, dim=1).to(device1)
650
+
651
+ oa = ""
652
+
653
+ loss = 0.00
654
+
655
+ with torch.no_grad():
656
+
657
+ model1.reset()
658
+
659
+ model2.reset()
660
+
661
+ model3.reset()
662
+
663
+ model4.reset()
664
+
665
+ model5.reset()
666
+
667
+ model6.reset()
668
+
669
+ oa = ""
670
+
671
+ for a in range(dat.shape[1] - 1):
672
+
673
+ out = model1(dat[:, a])
674
+
675
+ out = model2(out.to(device2))
676
+
677
+ out = model3(out.to(device3))
678
+
679
+ out = model4(out.to(device4))
680
+
681
+ out = model5(out.to(device5))
682
+
683
+ out = model6(out.to(device6))
684
+
685
+ out = output(out.to(device7))
686
+
687
+ sfo = torch.nn.functional.softmax(out, dim=-1)
688
+
689
+ wid = torch.argmax(sfo, dim=-1).item()
690
+
691
+ try:
692
+
693
+ wd = i2w[wid]
694
+
695
+ except:
696
+
697
+ oa = oa + "ERROR"
698
+
699
+ else:
700
+ oa = oa + wd
701
+
702
+ loss += lossf(out, trued[:, a+1].to(device2))
703
+
704
+ print("validationloss", loss.item() / dat.shape[1], "preview", oa)
705
+ if __name__ == "__main__":
706
+ #DataMake()
707
+ #Fineturning(Load=False,dim=2048, outputdim=21000,lr=1e-03, onestep=300, uselen=128)
708
+ #Predict(dim=2048, outputdim=21000, maxlen=128)
3BSanokaKai2/LLM1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:012d20b8fc0d8af6f4db67bafb2a28ca59a2ee56423eac2e601a1697beffe298
3
+ size 604773006
3BSanokaKai2/LLM2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ee5932033baa47419e20a437951b9afe371d083837e4f687ad8222116b41936
3
+ size 604244122
3BSanokaKai2/LLM3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57b7e2c4dd0e3fa6a05bdf75bdb1d9668586e9bc9cc542e76475649f699cb480
3
+ size 604244122
3BSanokaKai2/LLM4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b064b7d5fdd79e6e78a754c3382806b5c52eea990ad6df864a056b79c512fb2e
3
+ size 604244122
3BSanokaKai2/LLM5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f448ad6bd54de6dbc6210a1846358ff7ed77a3741d51f4d2d277b0b9c55879a1
3
+ size 604244122
3BSanokaKai2/LLM6.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bb731e1a1df5a2e205df89e1ceb5d856a81e871d88faf1ebb3ec05bb3880be7
3
+ size 604244122
3BSanokaKai2/licence.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Copyright (c) <2024> <Apfel X:@KyoumeiProject>
2
+
3
+ 以下に定める条件に従い、本ソフトウェアおよび関連文書のファイル(以下「ソフトウェア」)の複製を取得するすべての人に対し、ソフトウェアを無制限に扱うことを無償で許可します。これには、ソフトウェアの複製を使用、複写、変更、結合、掲載、頒布、サブライセンス、および/または販売する権利、およびソフトウェアを提供する相手に同じことを許可する権利も無制限に含まれます。
4
+
5
+ 上記の著作権表示および本許諾表示を、ソフトウェアのすべての複製または重要な部分に記載するものとします。
6
+
7
+ ソフトウェアは「現状のまま」で、明示であるか暗黙であるかを問わず、何らの保証もなく提供されます。ここでいう保証とは、商品性、特定の目的への適合性、および権利非侵害についての保証も含みますが、それに限定されるものではありません。 作者または著作権者は、契約行為、不法行為、またはそれ以外であろうと、ソフトウェアに起因または関連し、あるいはソフトウェアの使用またはその他の扱いによって生じる一切の請求、損害、その他の義務について何らの責任も負わないものとします。
3BSanokaKai2/output.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d62f04138b99cc0735f102e2179672be006c45a37f4b69342f3389b939fba28
3
+ size 86059474
3BSanokaKai2/readme.txt ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ AI-Large.pyがトレーニングコードです。
2
+ ファインチューニング済みデータがないので、
3
+ ファインチューニング関数を用意しています。
4
+
5
+ 警告:FT含め学習はメインメモリを128GB積んでいないマシンを推奨。ブルスク出すかもしれません。
6
+ 注意:GPUを7台使用する設定になっています。もし変更したい場合は"cuda:n"となっている所を探し、希望のGPU番号、またはcpuを選択してください。
7
+
8
+ 使用ライブラリ
9
+
10
+
11
+ import MeCab
12
+ import unidic
13
+ import torch
14
+ import copy
15
+ import time
16
+ import matplotlib.pyplot as plt
17
+ import re
18
+ import math
19
+ import numpy as np
20
+ from gensim.models import Word2Vec
21
+ import pickle
22
+ import threading
23
+ import sentencepiece
24
+
25
+ # ファインチューニングの方法
26
+ まず、「train_data.txt」と言うファイルを用意します。
27
+ その中に、ファインチューニング用のデータを用意してください。
28
+ train_data.txtは、改行ごとに別の時系列として扱われます。
29
+ train_data.txtを用意したら、AI-Large.pyを実行してください。
30
+ 実行すると、DataMake()関数により、学習データがベクトル化されます。
31
+ 次ににFineturning()を実行されます。
32
+ これで学習が行われます。
33
+ 学習が始まると出力サンプルが表示されるので、ある程度の日本語になったらctrl+cを使い止めましょう。
34
+ 最初は、50epochと表示される位でctrl+cを実行することをお勧めします。
35
+ これでfineturning.pthが生成されます。
36
+ 最後に、Fineturning()とDataMake()をコメントアウトし、Predict()を実行すると、使用できます。
37
+ 「Q>」と表示されるので、そこに入力を入れましょう。
38
+ そうすると「A>」の横に出力が出るはずです。(FT不足だと、何も出力されない場合があります。)
3BSanokaKai2/table.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90a6e7244d7c9f6d7baaac0fde820a1eb41724e6a0f9fdd793f00e5c02b62059
3
+ size 27069230
3BSanokaKai2/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e0594d183dc437f0b24fd52db43c8ef068d39c0f5bdec0cc1fd5b867214675f
3
+ size 577009
3BSanokaKai2/tokenizer.vocab ADDED
The diff for this file is too large to render. See raw diff
 
3BSanokaKai2/word2vec.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b70326897d6913da9aa1fc2e837e7531458359740bae17580c8c9d82a7782efe
3
+ size 21157728