ApfelSchorle
/

SanokaLayer

Model card Files Files and versions Community

ApfelSchorle commited on Mar 28

Commit

9e92d30

•

1 Parent(s): 844e982

upload All

Browse files

Files changed (15) hide show

.gitattributes +1 -0
3BSanokaKai2/AI-Large.py +708 -0
3BSanokaKai2/LLM1.pth +3 -0
3BSanokaKai2/LLM2.pth +3 -0
3BSanokaKai2/LLM3.pth +3 -0
3BSanokaKai2/LLM4.pth +3 -0
3BSanokaKai2/LLM5.pth +3 -0
3BSanokaKai2/LLM6.pth +3 -0
3BSanokaKai2/licence.txt +7 -0
3BSanokaKai2/output.pth +3 -0
3BSanokaKai2/readme.txt +38 -0
3BSanokaKai2/table.txt +3 -0
3BSanokaKai2/tokenizer.model +3 -0
3BSanokaKai2/tokenizer.vocab +0 -0
3BSanokaKai2/word2vec.model +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+3BSanokaKai2/table.txt filter=lfs diff=lfs merge=lfs -text

3BSanokaKai2/AI-Large.py ADDED Viewed

	@@ -0,0 +1,708 @@

+# -*- coding: utf-8 -*-
+"""
+Created on Thu Mar 21 10:34:46 2024
+@author: takan
+"""
+import MeCab
+import torch
+import copy
+import time
+import matplotlib.pyplot as plt
+import re
+import math
+import numpy as np
+from gensim.models import Word2Vec
+import pickle
+import threading
+import sentencepiece as spm
+class DenseBlock(torch.nn.Module):
+    def __init__(self, dim, mul=1):
+        super().__init__()
+        self.I = torch.nn.Linear(dim, dim*mul)
+        self.O = torch.nn.Linear(dim*mul, dim)
+    def forward(self, x):
+        x = self.I(x)
+        x = torch.nn.functional.elu(x)
+        x = self.O(x)
+        return x
+class AttentionBlock(torch.nn.Module):
+    def __init__(self, dim, mul=1):
+        super().__init__()
+        self.Q = torch.nn.Linear(dim, dim*mul)
+        self.K = torch.nn.Linear(dim, dim*mul)
+        self.V = torch.nn.Linear(dim, dim*mul)
+        self.O = torch.nn.Linear(dim*mul, dim)
+    def forward(self, q,k,v):
+        q = self.Q(q)
+        k = self.K(k)
+        v = self.V(v)
+        x = torch.nn.functional.softmax(q * k, dim=-1) * v
+        x = self.O(x)
+        return x
+"""
+class AttentionBlock(torch.nn.Module):
+    def __init__(self, dim, mul=1):
+        super().__init__()
+        self.attn = torch.nn.MultiheadAttention(dim, 16, batch_first=True)
+    def forward(self, q,k,v):
+        x = self.attn(q, k, v)[0]
+        return x
+"""
+class SanokaLayer(torch.nn.Module):
+    def __init__(self, dim, mul=1):
+        super().__init__()
+        self.x = None
+        self.A = AttentionBlock(dim, mul)
+        self.B = DenseBlock(dim, mul)
+    def reset(self, x=None):
+        self.x = x
+    def forward(self, u):
+        if (self.x != None):
+            uu = torch.nn.functional.normalize(u)
+            xx = torch.nn.functional.normalize(self.x)
+            x = self.A(uu, xx, xx)
+            y = self.B(torch.nn.functional.normalize(x)) + u
+            self.x = x + self.x
+            return y
+        else:
+            uu = torch.nn.functional.normalize(u)
+            x = self.A(uu, uu, uu)
+            y = self.B(torch.nn.functional.normalize(x)) + u
+            self.x = x
+            return y
+class SanokaModel(torch.nn.Module):
+    def __init__(self, dim, mul=1, Top=True):
+        super().__init__()
+        self.Top = Top
+        if (Top):
+            self.I = torch.nn.Linear(128, dim)
+        self.A = SanokaLayer(dim, mul)
+        self.B = SanokaLayer(dim, mul)
+        self.C = SanokaLayer(dim, mul)
+        self.D = SanokaLayer(dim, mul)
+        self.E = SanokaLayer(dim, mul)
+        self.F = SanokaLayer(dim, mul)
+    def reset(self):
+        self.A.reset()
+        self.B.reset()
+        self.C.reset()
+        self.D.reset()
+        self.E.reset()
+        self.F.reset()
+    def forward(self, x):
+        if (self.Top):
+            x = self.I(x)
+        x = self.A(x)
+        x = self.B(x)
+        x = self.C(x)
+        x = self.D(x)
+        x = self.E(x)
+        x = self.F(x)
+        return x
+class OutputLayer (torch.nn.Module):
+    def __init__(self, hiddendim, worddim=59000, heads=4):
+        super().__init__()
+        self.H = torch.nn.Linear(hiddendim, worddim)
+    def forward(self, inpute):
+        x = inpute
+        x = self.H(x)
+        return x
+def GOILOAD():
+    fuf = open("table.txt", "r", encoding="UTF-8")
+    goi = fuf.read().split("\n")
+    fuf.close()
+    chardim = len(goi[1:])
+    charid = {goi[i+1].split()[0]:i for i in range(chardim-1)}
+    return charid, [goi[ia+1].split()[0] for ia in range(chardim-1)]
+datas = []
+trues = []
+lens = []
+dones = 0
+def Convert(buns, table, maxlen=256):
+    buns = buns.split("\n")
+    sp = spm.SentencePieceProcessor()
+    sp.Load("tokenizer.model")
+    w2v = Word2Vec.load("word2vec.model")
+    data = []
+    true = []
+    lena = []
+    for datac in range(len(buns)):
+        #print(datac)
+        #print(buns[datac])
+        error = False
+        try:
+            buna = sp.EncodeAsPieces(buns[datac])[:maxlen]
+            a = torch.from_numpy(w2v.wv[buna])
+            b = torch.tensor([table[buna[ii]] for ii in range(len(buna))])
+            ll = len(buna)
+            c = ll
+        except:
+            print("ERROR")
+        else:
+            data.append(a)
+            true.append(b)
+            lena.append(c)
+            print(datac)
+    f = open("Train_Data.bin", "wb")
+    pickle.dump((data, true, lena), f)
+    f.close()
+    return
+def SPMake():
+    spm.SentencePieceTrainer.Train(f"--input=train_data.txt --model_prefix=tokenizer --vocab_size=20000 --train_extremely_large_corpus=True")
+def W2VMake(filepath="train_data.txt", mincount=50, worker=60):
+    sp = spm.SentencePieceProcessor()
+    sp.Load("tokenizer.model")
+    f = open(filepath, mode="r", encoding="UTF-8")
+    texts = f.read().split("\n")
+    f.close()
+    dat = []
+    print(len(texts))
+    for a in range(len(texts)):
+        dat.append(sp.EncodeAsPieces(texts[a]))
+        print(a)
+    model = Word2Vec(sentences=dat, vector_size=128, window=100, min_count=mincount, workers=worker)
+    model.save("word2vec.model")
+    model.wv.save_word2vec_format('table.txt')
+def DataMake(filepath="train_data.txt", maxlen=129):
+    table, i2w = GOILOAD()
+    print(len(table))
+    time.sleep(1)
+    f = open(filepath, mode="r", encoding="UTF-8")
+    txt = f.read()
+    f.close()
+    Convert(txt, table)
+    return None
+def PreTrain(Load=False, dim=512, outputdim=40000, lr=1e-04, epoch=10, epochload=1000,usedata=480000, onestep=100, uselen=64):
+    global datas
+    global trues
+    global lens
+    torch.manual_seed(1293431)
+    #torch.manual_seed(576765)
+    device1 = torch.device("cuda:0")
+    device2 = torch.device("cuda:1")
+    device3 = torch.device("cuda:2")
+    device4 = torch.device("cuda:3")
+    device5 = torch.device("cuda:4")
+    device6 = torch.device("cuda:5")
+    device7 = torch.device("cuda:6")
+    lossf = torch.nn.CrossEntropyLoss()
+    model1 = SanokaModel(dim, 2, True).to(torch.bfloat16).to(device1)
+    model2 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device2)
+    model3 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device3)
+    model4 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device4)
+    model5 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device5)
+    model6 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device6)
+    output = OutputLayer(dim, outputdim).to(torch.bfloat16).to(device7)
+    if (Load):
+        model1.load_state_dict(torch.load("LLM1.pth", map_location=device1))
+        model2.load_state_dict(torch.load("LLM2.pth", map_location=device2))
+        model3.load_state_dict(torch.load("LLM3.pth", map_location=device3))
+        model4.load_state_dict(torch.load("LLM4.pth", map_location=device4))
+        model5.load_state_dict(torch.load("LLM5.pth", map_location=device5))
+        model6.load_state_dict(torch.load("LLM6.pth", map_location=device6))
+        output.load_state_dict(torch.load("output.pth", map_location=device7))
+    model1Optim = torch.optim.Adam(model1.parameters(), lr=lr)
+    model2Optim = torch.optim.Adam(model2.parameters(), lr=lr)
+    model3Optim = torch.optim.Adam(model3.parameters(), lr=lr)
+    model4Optim = torch.optim.Adam(model4.parameters(), lr=lr)
+    model5Optim = torch.optim.Adam(model5.parameters(), lr=lr)
+    model6Optim = torch.optim.Adam(model6.parameters(), lr=lr)
+    outputO = torch.optim.Adam(output.parameters(), lr=lr)
+    f = open("Train_Data.bin", "rb")
+    datas, trues, lens = pickle.load(f)
+    f.close()
+    train_x = torch.zeros((epochload, uselen, 128)).to(torch.bfloat16).to(device1)
+    train_y = torch.full((epochload, uselen), outputdim - 1, dtype=torch.long).to(device7)
+    table, i2w = GOILOAD()
+    base = 0
+    epoch = int(np.floor((len(datas) / epochload) * epoch))
+    print("データ量", len(datas))
+    for epochs in range(epoch):
+        train_x = train_x.detach()
+        train_y = train_y.detach()
+        if (base < len(datas) - epochload*2):
+            base += epochload
+        else:
+            base = 0
+        if (base > usedata):
+            base = 0
+        for b in range(epochload):
+            a = b + base
+            leng = lens[a]
+            if (leng > uselen):
+                leng = uselen
+            train_x[b, :datas[a].shape[0]] = datas[a].to(torch.bfloat16).to(device1)[:uselen]
+            train_y[b, :trues[a].shape[0]] = trues[a].to(device7).to(torch.long)[:uselen]
+        epls = 0.00
+        timem = time.time()
+        for steps in range(epochload//onestep):
+            model1.reset()
+            model2.reset()
+            model3.reset()
+            model4.reset()
+            model5.reset()
+            model6.reset()
+            oa = ""
+            model1Optim.zero_grad()
+            model2Optim.zero_grad()
+            model3Optim.zero_grad()
+            model4Optim.zero_grad()
+            model5Optim.zero_grad()
+            model6Optim.zero_grad()
+            outputO.zero_grad()
+            loss = 0.00
+            for b in range(uselen-1):
+                out = model1(train_x[steps*onestep:steps*onestep+onestep, b])
+                out = model2(out.to(device2))
+                out = model3(out.to(device3))
+                out = model4(out.to(device4))
+                out = model5(out.to(device5))
+                out = model6(out.to(device6))
+                out = output(out.to(device7))
+                loss += lossf(out, train_y[steps*onestep:steps*onestep+onestep, b+1])
+                epls += loss
+                sfo = torch.nn.functional.softmax(out[0], dim=-1)
+                wid = torch.argmax(sfo, dim=-1).item()
+                try:
+                    wd = i2w[wid]
+                except:
+                    oa = oa + "ERROR"
+                else:
+                    oa = oa + wd
+            loss.backward()
+            #print(b)
+            model1Optim.step()
+            model2Optim.step()
+            model3Optim.step()
+            model4Optim.step()
+            model5Optim.step()
+            model6Optim.step()
+            outputO.step()
+        print("出力サンプル> ", oa[:32].replace("?", ""))
+        print("epoch", epochs,"Train_epoch_sum_loss", epls.item(), "time", time.time() - timem)
+        if (epochs % 10 == 9):
+            torch.save(model1.state_dict(), "LLM1.pth")
+            torch.save(model2.state_dict(), "LLM2.pth")
+            torch.save(model3.state_dict(), "LLM3.pth")
+            torch.save(model4.state_dict(), "LLM4.pth")
+            torch.save(model5.state_dict(), "LLM5.pth")
+            torch.save(model6.state_dict(), "LLM6.pth")
+            torch.save(output.state_dict(), "output.pth")
+def Fineturning(Load=False, dim=512, outputdim=40000, lr=1e-04, epoch=10000, epochload=1000, onestep=200, uselen=32):
+    global datas
+    global trues
+    global lens
+    torch.manual_seed(1293431)
+    #torch.manual_seed(576765)
+    device1 = torch.device("cuda:0")
+    device2 = torch.device("cuda:1")
+    device3 = torch.device("cuda:2")
+    device4 = torch.device("cuda:3")
+    device5 = torch.device("cuda:4")
+    device6 = torch.device("cuda:5")
+    device7 = torch.device("cuda:6")
+    lossf = torch.nn.CrossEntropyLoss()
+    model1 = SanokaModel(dim, 2, True).to(torch.bfloat16).to(device1)
+    model2 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device2)
+    model3 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device3)
+    model4 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device4)
+    model5 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device5)
+    model6 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device6)
+    output = OutputLayer(dim, outputdim).to(torch.bfloat16).to(device7)
+    model1.load_state_dict(torch.load("LLM1.pth", map_location=device1))
+    model2.load_state_dict(torch.load("LLM2.pth", map_location=device2))
+    model3.load_state_dict(torch.load("LLM3.pth", map_location=device3))
+    model4.load_state_dict(torch.load("LLM4.pth", map_location=device4))
+    model5.load_state_dict(torch.load("LLM5.pth", map_location=device5))
+    model6.load_state_dict(torch.load("LLM6.pth", map_location=device6))
+    output.load_state_dict(torch.load("output.pth", map_location=device7))
+    model1Optim = torch.optim.Adam(model1.parameters(), lr=lr)
+    model2Optim = torch.optim.Adam(model2.parameters(), lr=lr)
+    model3Optim = torch.optim.Adam(model3.parameters(), lr=lr)
+    model4Optim = torch.optim.Adam(model4.parameters(), lr=lr)
+    model5Optim = torch.optim.Adam(model5.parameters(), lr=lr)
+    model6Optim = torch.optim.Adam(model6.parameters(), lr=lr/500)
+    outputO = torch.optim.Adam(output.parameters(), lr=lr)
+    f = open("Train_Data.bin", "rb")
+    datas, trues, lens = pickle.load(f)
+    f.close()
+    train_x = torch.zeros((epochload, uselen, 128)).to(torch.bfloat16).to(device1)
+    train_y = torch.full((epochload, uselen), outputdim - 1, dtype=torch.long).to(device7)
+    table, i2w = GOILOAD()
+    base = 0
+    epoch = int(np.floor((len(datas) / epochload) * epoch))
+    #print(epoch)
+    for epochs in range(epoch):
+        train_x = train_x.detach()
+        train_y = train_y.detach()
+        if (base < len(datas) - epochload*2):
+            base += epochload
+        else:
+            base = 0
+        for b in range(epochload):
+            a = b + base
+            #print(a)
+            leng = lens[a]
+            if (leng > uselen):
+                leng = uselen
+            train_x[b, :datas[a].shape[0]] = datas[a].to(torch.bfloat16).to(device1)[:uselen]
+            train_y[b, :trues[a].shape[0]] = trues[a].to(device7).to(torch.long)[:uselen]
+        epls = 0.00
+        timem = time.time()
+        for steps in range(epochload//onestep):
+            model1.reset()
+            model2.reset()
+            model3.reset()
+            model4.reset()
+            model5.reset()
+            model6.reset()
+            oa = ""
+            loss = 0.00
+            model1Optim.zero_grad()
+            model2Optim.zero_grad()
+            model3Optim.zero_grad()
+            model4Optim.zero_grad()
+            model5Optim.zero_grad()
+            model6Optim.zero_grad()
+            outputO.zero_grad()
+            for b in range(uselen-1):
+                with torch.no_grad():
+                    out = model1(train_x[steps*onestep:steps*onestep+onestep, b])
+                    out = model2(out.to(device2))
+                    out = model3(out.to(device3))
+                    out = model4(out.to(device4))
+                    out = model5(out.to(device5))
+                    out = model6(out.to(device6))
+                out = output(out.to(device7))
+                loss += lossf(out, train_y[steps*onestep:steps*onestep+onestep, b+1])
+                epls += loss.item()
+                sfo = torch.nn.functional.softmax(out[0], dim=-1)
+                wid = torch.argmax(sfo, dim=-1).item()
+                try:
+                    wd = i2w[wid]
+                except:
+                    oa = oa + "ERROR"
+                else:
+                    oa = oa + wd
+            loss.backward()
+            #model6Optim.step()
+            outputO.step()
+        print("出力サンプル> ", oa[:32].replace("?", ""))
+        print("epoch", epochs,"Train_epoch_sum_loss", epls, "time", time.time() - timem)
+        if (epochs % 10 == 9):
+            #torch.save(model6.state_dict(), "LLM6F.pth")
+            torch.save(output.state_dict(), "fineturning.pth")
+def Predict(dim=512, outputdim=40000, maxlen=32):
+    torch.manual_seed(1293431)
+    table, i2w = GOILOAD()
+    sp = spm.SentencePieceProcessor()
+    sp.Load("tokenizer.model")
+    w2v = Word2Vec.load("word2vec.model")
+    device1 = torch.device("cuda:0")
+    device2 = torch.device("cuda:1")
+    device3 = torch.device("cuda:2")
+    device4 = torch.device("cuda:3")
+    device5 = torch.device("cuda:4")
+    device6 = torch.device("cuda:5")
+    device7 = torch.device("cuda:6")
+    lossf = torch.nn.CrossEntropyLoss()
+    model1 = SanokaModel(dim, 2, True).to(torch.bfloat16).to(device1)
+    model2 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device2)
+    model3 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device3)
+    model4 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device4)
+    model5 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device5)
+    model6 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device6)
+    output = OutputLayer(dim, outputdim).to(torch.bfloat16).to(device7)
+    model1.load_state_dict(torch.load("LLM1.pth", map_location=device1))
+    model2.load_state_dict(torch.load("LLM2.pth", map_location=device2))
+    model3.load_state_dict(torch.load("LLM3.pth", map_location=device3))
+    model4.load_state_dict(torch.load("LLM4.pth", map_location=device4))
+    model5.load_state_dict(torch.load("LLM5.pth", map_location=device5))
+    model6.load_state_dict(torch.load("LLM6.pth", map_location=device6))
+    output.load_state_dict(torch.load("fineturning.pth", map_location=device7))
+    while(1):
+        dd = input("Q> ")# + ","
+        data = []
+        buna = sp.EncodeAsPieces(dd)
+        print(buna)
+        for a in range(len(buna)):
+            try:
+                data.append(torch.from_numpy(w2v.wv[buna[a]]).view(1, 1, 128).to(device1))
+            except KeyError:
+                print("Not Found")
+        dat = torch.cat(data, dim=1).to(device1)
+        oa = ""
+        with torch.no_grad():
+            model1.reset()
+            model2.reset()
+            model3.reset()
+            model4.reset()
+            model5.reset()
+            model6.reset()
+            oa = ""
+            for a in range(dat.shape[1] - 1):
+                out = model1(dat[:, a].to(torch.bfloat16))
+                out = model2(out.to(device2))
+                out = model3(out.to(device3))
+                out = model4(out.to(device4))
+                out = model5(out.to(device5))
+                out = model6(out.to(device6))
+                out = output(out.to(device7))
+            for b in range(maxlen - dat.shape[1]):
+                out = model1(dat[:, -1].to(torch.bfloat16))
+                out = model2(out.to(device2))
+                out = model3(out.to(device3))
+                out = model4(out.to(device4))
+                out = model5(out.to(device5))
+                out = model6(out.to(device6))
+                out = output(out.to(device7))
+                sfo = torch.nn.functional.softmax(out, dim=-1)
+                wid = torch.argmax(sfo, dim=-1).item()
+                if (wid != outputdim - 1):
+                    try:
+                        wd = i2w[wid]
+                    except:
+                        oa = oa + "ERROR"
+                    else:
+                        oa = oa + wd
+                        dat = torch.cat([dat, torch.from_numpy(w2v.wv[wd]).to(device1).view(1, 1, 128)], dim=1)
+        print("A> ", oa.replace("?", ""))
+def ValidationLoss(dim=512, outputdim=40000, maxlen=32):
+    torch.manual_seed(1293431)
+    table, i2w = GOILOAD()
+    tagger = MeCab.Tagger("-Owakati")
+    w2v = Word2Vec.load("word2vec.model")
+    device1 = torch.device("cuda:0")
+    device2 = torch.device("cuda:1")
+    device3 = torch.device("cuda:2")
+    device4 = torch.device("cuda:3")
+    device5 = torch.device("cuda:4")
+    device6 = torch.device("cuda:5")
+    device7 = torch.device("cuda:6")
+    lossf = torch.nn.CrossEntropyLoss()
+    model1 = SanokaModel(dim, 2, True).to(torch.bfloat16).to(device1)
+    model2 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device2)
+    model3 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device3)
+    model4 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device4)
+    model5 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device5)
+    model6 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device6)
+    output = OutputLayer(dim, outputdim).to(torch.bfloat16).to(device7)
+    model1.load_state_dict(torch.load("LLM1.pth", map_location=device1))
+    model2.load_state_dict(torch.load("LLM2.pth", map_location=device2))
+    model3.load_state_dict(torch.load("LLM3.pth", map_location=device3))
+    model4.load_state_dict(torch.load("LLM4.pth", map_location=device4))
+    model5.load_state_dict(torch.load("LLM5.pth", map_location=device5))
+    model6.load_state_dict(torch.load("LLM6.pth", map_location=device6))
+    output.load_state_dict(torch.load("output.pth", map_location=device7))
+    dd = input("TestData> ")
+    lossf = torch.nn.CrossEntropyLoss()
+    data = []
+    buna = tagger.parse(dd).split()
+    trued = torch.tensor([table[dfg] for dfg in buna]).to(torch.long).unsqueeze(dim=0)
+    print(buna)
+    print(trued)
+    for a in range(len(buna)):
+        try:
+            data.append(torch.from_numpy(w2v.wv[buna[a]]).view(1, 1, 128).to(device1))
+        except KeyError:
+            print("Not Found")
+    dat = torch.cat(data, dim=1).to(device1)
+    oa = ""
+    loss = 0.00
+    with torch.no_grad():
+        model1.reset()
+        model2.reset()
+        model3.reset()
+        model4.reset()
+        model5.reset()
+        model6.reset()
+        oa = ""
+        for a in range(dat.shape[1] - 1):
+            out = model1(dat[:, a])
+            out = model2(out.to(device2))
+            out = model3(out.to(device3))
+            out = model4(out.to(device4))
+            out = model5(out.to(device5))
+            out = model6(out.to(device6))
+            out = output(out.to(device7))
+            sfo = torch.nn.functional.softmax(out, dim=-1)
+            wid = torch.argmax(sfo, dim=-1).item()
+            try:
+                wd = i2w[wid]
+            except:
+                oa = oa + "ERROR"
+            else:
+                oa = oa + wd
+            loss += lossf(out, trued[:, a+1].to(device2))
+    print("validationloss", loss.item() / dat.shape[1], "preview", oa)
+if __name__ == "__main__":
+    #DataMake()
+    #Fineturning(Load=False,dim=2048, outputdim=21000,lr=1e-03, onestep=300, uselen=128)
+    #Predict(dim=2048, outputdim=21000, maxlen=128)

3BSanokaKai2/LLM1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:012d20b8fc0d8af6f4db67bafb2a28ca59a2ee56423eac2e601a1697beffe298
+size 604773006

3BSanokaKai2/LLM2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4ee5932033baa47419e20a437951b9afe371d083837e4f687ad8222116b41936
+size 604244122

3BSanokaKai2/LLM3.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:57b7e2c4dd0e3fa6a05bdf75bdb1d9668586e9bc9cc542e76475649f699cb480
+size 604244122

3BSanokaKai2/LLM4.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b064b7d5fdd79e6e78a754c3382806b5c52eea990ad6df864a056b79c512fb2e
+size 604244122

3BSanokaKai2/LLM5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f448ad6bd54de6dbc6210a1846358ff7ed77a3741d51f4d2d277b0b9c55879a1
+size 604244122

3BSanokaKai2/LLM6.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9bb731e1a1df5a2e205df89e1ceb5d856a81e871d88faf1ebb3ec05bb3880be7
+size 604244122

3BSanokaKai2/licence.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+Copyright (c) <2024> <Apfel X:@KyoumeiProject>
+以下に定める条件に従い、本ソフトウェアおよび関連文書のファイル（以下「ソフトウェア」）の複製を取得するすべての人に対し、ソフトウェアを無制限に扱うことを無償で許可します。これには、ソフトウェアの複製を使用、複写、変更、結合、掲載、頒布、サブライセンス、および/または販売する権利、およびソフトウェアを提供する相手に同じことを許可する権利も無制限に含まれます。
+上記の著作権表示および本許諾表示を、ソフトウェアのすべての複製または重要な部分に記載するものとします。
+ソフトウェアは「現状のまま」で、明示であるか暗黙であるかを問わず、何らの保証もなく提供されます。ここでいう保証とは、商品性、特定の目的への適合性、および権利非侵害についての保証も含みますが、それに限定されるものではありません。 作者または著作権者は、契約行為、不法行為、またはそれ以外であろうと、ソフトウェアに起因または関連し、あるいはソフトウェアの使用またはその他の扱いによって生じる一切の請求、損害、その他の義務について何らの責任も負わないものとします。

3BSanokaKai2/output.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4d62f04138b99cc0735f102e2179672be006c45a37f4b69342f3389b939fba28
+size 86059474

3BSanokaKai2/readme.txt ADDED Viewed

	@@ -0,0 +1,38 @@

+AI-Large.pyがトレーニングコードです。
+ファインチューニング済みデータがないので、
+ファインチューニング関数を用意しています。
+警告：FT含め学習はメインメモリを128GB積んでいないマシンを推奨。ブルスク出すかもしれません。
+注意：GPUを7台使用する設定になっています。もし変更したい場合は"cuda:n"となっている所を探し、希望のGPU番号、またはcpuを選択してください。
+使用ライブラリ
+import MeCab
+import unidic
+import torch
+import copy
+import time
+import matplotlib.pyplot as plt
+import re
+import math
+import numpy as np
+from gensim.models import Word2Vec
+import pickle
+import threading
+import sentencepiece
+# ファインチューニングの方法
+まず、「train_data.txt」と言うファイルを用意します。
+その中に、ファインチューニング用のデータを用意してください。
+train_data.txtは、改行ごとに別の時系列として扱われます。
+train_data.txtを用意したら、AI-Large.pyを実行してください。
+実行すると、DataMake()関数により、学習データがベクトル化されます。
+次ににFineturning()を実行されます。
+これで学習が行われます。
+学習が始まると出力サンプルが表示されるので、ある程度の日本語になったらctrl+cを使い止めましょう。
+最初は、50epochと表示される位でctrl+cを実行することをお勧めします。
+これでfineturning.pthが生成されます。
+最後に、Fineturning()とDataMake()をコメントアウトし、Predict()を実行すると、使用できます。
+「Q>」と表示されるので、そこに入力を入れましょう。
+そうすると「A>」の横に出力が出るはずです。（FT不足だと、何も出力されない場合があります。）

3BSanokaKai2/table.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:90a6e7244d7c9f6d7baaac0fde820a1eb41724e6a0f9fdd793f00e5c02b62059
+size 27069230

3BSanokaKai2/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8e0594d183dc437f0b24fd52db43c8ef068d39c0f5bdec0cc1fd5b867214675f
+size 577009

3BSanokaKai2/tokenizer.vocab ADDED Viewed

The diff for this file is too large to render. See raw diff

3BSanokaKai2/word2vec.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b70326897d6913da9aa1fc2e837e7531458359740bae17580c8c9d82a7782efe
+size 21157728