|
|
|
""" |
|
Created on Thu Mar 21 10:34:46 2024 |
|
|
|
@author: takan |
|
""" |
|
|
|
import MeCab |
|
import torch |
|
import copy |
|
import time |
|
import matplotlib.pyplot as plt |
|
import re |
|
import math |
|
import numpy as np |
|
from gensim.models import Word2Vec |
|
import pickle |
|
import threading |
|
import sentencepiece as spm |
|
|
|
class DenseBlock(torch.nn.Module): |
|
def __init__(self, dim, mul=1): |
|
super().__init__() |
|
self.I = torch.nn.Linear(dim, dim*mul) |
|
self.O = torch.nn.Linear(dim*mul, dim) |
|
def forward(self, x): |
|
x = self.I(x) |
|
x = torch.nn.functional.elu(x) |
|
x = self.O(x) |
|
return x |
|
|
|
class AttentionBlock(torch.nn.Module): |
|
def __init__(self, dim, mul=1): |
|
super().__init__() |
|
self.Q = torch.nn.Linear(dim, dim*mul) |
|
self.K = torch.nn.Linear(dim, dim*mul) |
|
self.V = torch.nn.Linear(dim, dim*mul) |
|
self.O = torch.nn.Linear(dim*mul, dim) |
|
def forward(self, q,k,v): |
|
q = self.Q(q) |
|
k = self.K(k) |
|
v = self.V(v) |
|
x = torch.nn.functional.softmax(q * k, dim=-1) * v |
|
x = self.O(x) |
|
return x |
|
""" |
|
class AttentionBlock(torch.nn.Module): |
|
def __init__(self, dim, mul=1): |
|
super().__init__() |
|
self.attn = torch.nn.MultiheadAttention(dim, 16, batch_first=True) |
|
def forward(self, q,k,v): |
|
x = self.attn(q, k, v)[0] |
|
return x |
|
""" |
|
class SanokaLayer(torch.nn.Module): |
|
def __init__(self, dim, mul=1): |
|
super().__init__() |
|
self.x = None |
|
self.A = AttentionBlock(dim, mul) |
|
self.B = DenseBlock(dim, mul) |
|
def reset(self, x=None): |
|
self.x = x |
|
def forward(self, u): |
|
if (self.x != None): |
|
uu = torch.nn.functional.normalize(u) |
|
xx = torch.nn.functional.normalize(self.x) |
|
x = self.A(uu, xx, xx) |
|
y = self.B(torch.nn.functional.normalize(x)) + u |
|
self.x = x + self.x |
|
return y |
|
else: |
|
uu = torch.nn.functional.normalize(u) |
|
x = self.A(uu, uu, uu) |
|
y = self.B(torch.nn.functional.normalize(x)) + u |
|
self.x = x |
|
return y |
|
|
|
class SanokaModel(torch.nn.Module): |
|
def __init__(self, dim, mul=1, Top=True): |
|
super().__init__() |
|
self.Top = Top |
|
if (Top): |
|
self.I = torch.nn.Linear(128, dim) |
|
self.A = SanokaLayer(dim, mul) |
|
self.B = SanokaLayer(dim, mul) |
|
self.C = SanokaLayer(dim, mul) |
|
self.D = SanokaLayer(dim, mul) |
|
self.E = SanokaLayer(dim, mul) |
|
self.F = SanokaLayer(dim, mul) |
|
def reset(self): |
|
self.A.reset() |
|
self.B.reset() |
|
self.C.reset() |
|
self.D.reset() |
|
self.E.reset() |
|
self.F.reset() |
|
|
|
def forward(self, x): |
|
if (self.Top): |
|
x = self.I(x) |
|
x = self.A(x) |
|
x = self.B(x) |
|
x = self.C(x) |
|
x = self.D(x) |
|
x = self.E(x) |
|
x = self.F(x) |
|
|
|
return x |
|
|
|
class OutputLayer (torch.nn.Module): |
|
def __init__(self, hiddendim, worddim=59000, heads=4): |
|
super().__init__() |
|
self.H = torch.nn.Linear(hiddendim, worddim) |
|
def forward(self, inpute): |
|
x = inpute |
|
x = self.H(x) |
|
return x |
|
|
|
def GOILOAD(): |
|
fuf = open("table.txt", "r", encoding="UTF-8") |
|
goi = fuf.read().split("\n") |
|
fuf.close() |
|
chardim = len(goi[1:]) |
|
charid = {goi[i+1].split()[0]:i for i in range(chardim-1)} |
|
return charid, [goi[ia+1].split()[0] for ia in range(chardim-1)] |
|
|
|
datas = [] |
|
trues = [] |
|
lens = [] |
|
dones = 0 |
|
def Convert(buns, table, maxlen=256): |
|
buns = buns.split("\n") |
|
sp = spm.SentencePieceProcessor() |
|
sp.Load("tokenizer.model") |
|
w2v = Word2Vec.load("word2vec.model") |
|
data = [] |
|
true = [] |
|
lena = [] |
|
for datac in range(len(buns)): |
|
|
|
|
|
error = False |
|
try: |
|
buna = sp.EncodeAsPieces(buns[datac])[:maxlen] |
|
a = torch.from_numpy(w2v.wv[buna]) |
|
b = torch.tensor([table[buna[ii]] for ii in range(len(buna))]) |
|
ll = len(buna) |
|
c = ll |
|
except: |
|
print("ERROR") |
|
else: |
|
data.append(a) |
|
true.append(b) |
|
lena.append(c) |
|
print(datac) |
|
f = open("Train_Data.bin", "wb") |
|
pickle.dump((data, true, lena), f) |
|
f.close() |
|
return |
|
|
|
def SPMake(): |
|
|
|
spm.SentencePieceTrainer.Train(f"--input=train_data.txt --model_prefix=tokenizer --vocab_size=20000 --train_extremely_large_corpus=True") |
|
def W2VMake(filepath="train_data.txt", mincount=50, worker=60): |
|
sp = spm.SentencePieceProcessor() |
|
sp.Load("tokenizer.model") |
|
f = open(filepath, mode="r", encoding="UTF-8") |
|
texts = f.read().split("\n") |
|
f.close() |
|
dat = [] |
|
print(len(texts)) |
|
for a in range(len(texts)): |
|
dat.append(sp.EncodeAsPieces(texts[a])) |
|
print(a) |
|
|
|
model = Word2Vec(sentences=dat, vector_size=128, window=100, min_count=mincount, workers=worker) |
|
model.save("word2vec.model") |
|
model.wv.save_word2vec_format('table.txt') |
|
|
|
def DataMake(filepath="train_data.txt", maxlen=129): |
|
table, i2w = GOILOAD() |
|
print(len(table)) |
|
time.sleep(1) |
|
f = open(filepath, mode="r", encoding="UTF-8") |
|
txt = f.read() |
|
f.close() |
|
Convert(txt, table) |
|
return None |
|
|
|
def PreTrain(Load=False, dim=512, outputdim=40000, lr=1e-04, epoch=10, epochload=1000,usedata=480000, onestep=100, uselen=64): |
|
global datas |
|
global trues |
|
global lens |
|
torch.manual_seed(1293431) |
|
|
|
device1 = torch.device("cuda:0") |
|
device2 = torch.device("cuda:1") |
|
device3 = torch.device("cuda:2") |
|
device4 = torch.device("cuda:3") |
|
device5 = torch.device("cuda:4") |
|
device6 = torch.device("cuda:5") |
|
device7 = torch.device("cuda:6") |
|
lossf = torch.nn.CrossEntropyLoss() |
|
model1 = SanokaModel(dim, 2, True).to(torch.bfloat16).to(device1) |
|
model2 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device2) |
|
model3 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device3) |
|
model4 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device4) |
|
model5 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device5) |
|
model6 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device6) |
|
output = OutputLayer(dim, outputdim).to(torch.bfloat16).to(device7) |
|
|
|
if (Load): |
|
model1.load_state_dict(torch.load("LLM1.pth", map_location=device1)) |
|
model2.load_state_dict(torch.load("LLM2.pth", map_location=device2)) |
|
model3.load_state_dict(torch.load("LLM3.pth", map_location=device3)) |
|
model4.load_state_dict(torch.load("LLM4.pth", map_location=device4)) |
|
model5.load_state_dict(torch.load("LLM5.pth", map_location=device5)) |
|
model6.load_state_dict(torch.load("LLM6.pth", map_location=device6)) |
|
output.load_state_dict(torch.load("output.pth", map_location=device7)) |
|
model1Optim = torch.optim.Adam(model1.parameters(), lr=lr) |
|
model2Optim = torch.optim.Adam(model2.parameters(), lr=lr) |
|
model3Optim = torch.optim.Adam(model3.parameters(), lr=lr) |
|
model4Optim = torch.optim.Adam(model4.parameters(), lr=lr) |
|
model5Optim = torch.optim.Adam(model5.parameters(), lr=lr) |
|
model6Optim = torch.optim.Adam(model6.parameters(), lr=lr) |
|
outputO = torch.optim.Adam(output.parameters(), lr=lr) |
|
f = open("Train_Data.bin", "rb") |
|
datas, trues, lens = pickle.load(f) |
|
f.close() |
|
train_x = torch.zeros((epochload, uselen, 128)).to(torch.bfloat16).to(device1) |
|
train_y = torch.full((epochload, uselen), outputdim - 1, dtype=torch.long).to(device7) |
|
table, i2w = GOILOAD() |
|
base = 0 |
|
epoch = int(np.floor((len(datas) / epochload) * epoch)) |
|
print("データ量", len(datas)) |
|
for epochs in range(epoch): |
|
train_x = train_x.detach() |
|
train_y = train_y.detach() |
|
if (base < len(datas) - epochload*2): |
|
base += epochload |
|
else: |
|
base = 0 |
|
if (base > usedata): |
|
base = 0 |
|
for b in range(epochload): |
|
a = b + base |
|
leng = lens[a] |
|
if (leng > uselen): |
|
leng = uselen |
|
|
|
train_x[b, :datas[a].shape[0]] = datas[a].to(torch.bfloat16).to(device1)[:uselen] |
|
train_y[b, :trues[a].shape[0]] = trues[a].to(device7).to(torch.long)[:uselen] |
|
epls = 0.00 |
|
timem = time.time() |
|
for steps in range(epochload//onestep): |
|
model1.reset() |
|
model2.reset() |
|
model3.reset() |
|
model4.reset() |
|
model5.reset() |
|
model6.reset() |
|
oa = "" |
|
model1Optim.zero_grad() |
|
model2Optim.zero_grad() |
|
model3Optim.zero_grad() |
|
model4Optim.zero_grad() |
|
model5Optim.zero_grad() |
|
model6Optim.zero_grad() |
|
outputO.zero_grad() |
|
loss = 0.00 |
|
for b in range(uselen-1): |
|
out = model1(train_x[steps*onestep:steps*onestep+onestep, b]) |
|
out = model2(out.to(device2)) |
|
out = model3(out.to(device3)) |
|
out = model4(out.to(device4)) |
|
out = model5(out.to(device5)) |
|
out = model6(out.to(device6)) |
|
out = output(out.to(device7)) |
|
loss += lossf(out, train_y[steps*onestep:steps*onestep+onestep, b+1]) |
|
epls += loss |
|
|
|
sfo = torch.nn.functional.softmax(out[0], dim=-1) |
|
wid = torch.argmax(sfo, dim=-1).item() |
|
try: |
|
wd = i2w[wid] |
|
except: |
|
oa = oa + "ERROR" |
|
else: |
|
oa = oa + wd |
|
|
|
loss.backward() |
|
|
|
model1Optim.step() |
|
model2Optim.step() |
|
model3Optim.step() |
|
model4Optim.step() |
|
model5Optim.step() |
|
model6Optim.step() |
|
outputO.step() |
|
print("出力サンプル> ", oa[:32].replace("?", "")) |
|
print("epoch", epochs,"Train_epoch_sum_loss", epls.item(), "time", time.time() - timem) |
|
if (epochs % 10 == 9): |
|
torch.save(model1.state_dict(), "LLM1.pth") |
|
torch.save(model2.state_dict(), "LLM2.pth") |
|
torch.save(model3.state_dict(), "LLM3.pth") |
|
torch.save(model4.state_dict(), "LLM4.pth") |
|
torch.save(model5.state_dict(), "LLM5.pth") |
|
torch.save(model6.state_dict(), "LLM6.pth") |
|
torch.save(output.state_dict(), "output.pth") |
|
def Fineturning(Load=False, dim=512, outputdim=40000, lr=1e-04, epoch=10000, epochload=1000, onestep=200, uselen=32): |
|
global datas |
|
global trues |
|
global lens |
|
torch.manual_seed(1293431) |
|
|
|
device1 = torch.device("cuda:0") |
|
device2 = torch.device("cuda:1") |
|
device3 = torch.device("cuda:2") |
|
device4 = torch.device("cuda:3") |
|
device5 = torch.device("cuda:4") |
|
device6 = torch.device("cuda:5") |
|
device7 = torch.device("cuda:6") |
|
lossf = torch.nn.CrossEntropyLoss() |
|
model1 = SanokaModel(dim, 2, True).to(torch.bfloat16).to(device1) |
|
model2 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device2) |
|
model3 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device3) |
|
model4 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device4) |
|
model5 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device5) |
|
model6 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device6) |
|
output = OutputLayer(dim, outputdim).to(torch.bfloat16).to(device7) |
|
|
|
model1.load_state_dict(torch.load("LLM1.pth", map_location=device1)) |
|
model2.load_state_dict(torch.load("LLM2.pth", map_location=device2)) |
|
model3.load_state_dict(torch.load("LLM3.pth", map_location=device3)) |
|
model4.load_state_dict(torch.load("LLM4.pth", map_location=device4)) |
|
model5.load_state_dict(torch.load("LLM5.pth", map_location=device5)) |
|
model6.load_state_dict(torch.load("LLM6.pth", map_location=device6)) |
|
output.load_state_dict(torch.load("output.pth", map_location=device7)) |
|
model1Optim = torch.optim.Adam(model1.parameters(), lr=lr) |
|
model2Optim = torch.optim.Adam(model2.parameters(), lr=lr) |
|
model3Optim = torch.optim.Adam(model3.parameters(), lr=lr) |
|
model4Optim = torch.optim.Adam(model4.parameters(), lr=lr) |
|
model5Optim = torch.optim.Adam(model5.parameters(), lr=lr) |
|
model6Optim = torch.optim.Adam(model6.parameters(), lr=lr/500) |
|
outputO = torch.optim.Adam(output.parameters(), lr=lr) |
|
f = open("Train_Data.bin", "rb") |
|
datas, trues, lens = pickle.load(f) |
|
f.close() |
|
train_x = torch.zeros((epochload, uselen, 128)).to(torch.bfloat16).to(device1) |
|
train_y = torch.full((epochload, uselen), outputdim - 1, dtype=torch.long).to(device7) |
|
table, i2w = GOILOAD() |
|
base = 0 |
|
epoch = int(np.floor((len(datas) / epochload) * epoch)) |
|
|
|
for epochs in range(epoch): |
|
train_x = train_x.detach() |
|
train_y = train_y.detach() |
|
if (base < len(datas) - epochload*2): |
|
base += epochload |
|
else: |
|
base = 0 |
|
for b in range(epochload): |
|
a = b + base |
|
|
|
leng = lens[a] |
|
if (leng > uselen): |
|
leng = uselen |
|
|
|
train_x[b, :datas[a].shape[0]] = datas[a].to(torch.bfloat16).to(device1)[:uselen] |
|
train_y[b, :trues[a].shape[0]] = trues[a].to(device7).to(torch.long)[:uselen] |
|
epls = 0.00 |
|
timem = time.time() |
|
for steps in range(epochload//onestep): |
|
model1.reset() |
|
model2.reset() |
|
model3.reset() |
|
model4.reset() |
|
model5.reset() |
|
model6.reset() |
|
oa = "" |
|
loss = 0.00 |
|
model1Optim.zero_grad() |
|
model2Optim.zero_grad() |
|
model3Optim.zero_grad() |
|
model4Optim.zero_grad() |
|
model5Optim.zero_grad() |
|
model6Optim.zero_grad() |
|
outputO.zero_grad() |
|
for b in range(uselen-1): |
|
with torch.no_grad(): |
|
out = model1(train_x[steps*onestep:steps*onestep+onestep, b]) |
|
out = model2(out.to(device2)) |
|
out = model3(out.to(device3)) |
|
out = model4(out.to(device4)) |
|
out = model5(out.to(device5)) |
|
out = model6(out.to(device6)) |
|
out = output(out.to(device7)) |
|
loss += lossf(out, train_y[steps*onestep:steps*onestep+onestep, b+1]) |
|
epls += loss.item() |
|
|
|
sfo = torch.nn.functional.softmax(out[0], dim=-1) |
|
wid = torch.argmax(sfo, dim=-1).item() |
|
try: |
|
wd = i2w[wid] |
|
except: |
|
oa = oa + "ERROR" |
|
else: |
|
oa = oa + wd |
|
loss.backward() |
|
|
|
outputO.step() |
|
print("出力サンプル> ", oa[:32].replace("?", "")) |
|
print("epoch", epochs,"Train_epoch_sum_loss", epls, "time", time.time() - timem) |
|
if (epochs % 10 == 9): |
|
|
|
torch.save(output.state_dict(), "fineturning.pth") |
|
def Predict(dim=512, outputdim=40000, maxlen=32): |
|
|
|
torch.manual_seed(1293431) |
|
|
|
table, i2w = GOILOAD() |
|
sp = spm.SentencePieceProcessor() |
|
sp.Load("tokenizer.model") |
|
|
|
w2v = Word2Vec.load("word2vec.model") |
|
|
|
device1 = torch.device("cuda:0") |
|
|
|
device2 = torch.device("cuda:1") |
|
|
|
device3 = torch.device("cuda:2") |
|
|
|
device4 = torch.device("cuda:3") |
|
|
|
device5 = torch.device("cuda:4") |
|
|
|
device6 = torch.device("cuda:5") |
|
|
|
device7 = torch.device("cuda:6") |
|
|
|
lossf = torch.nn.CrossEntropyLoss() |
|
|
|
model1 = SanokaModel(dim, 2, True).to(torch.bfloat16).to(device1) |
|
|
|
model2 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device2) |
|
|
|
model3 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device3) |
|
|
|
model4 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device4) |
|
|
|
model5 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device5) |
|
|
|
model6 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device6) |
|
|
|
output = OutputLayer(dim, outputdim).to(torch.bfloat16).to(device7) |
|
|
|
|
|
|
|
model1.load_state_dict(torch.load("LLM1.pth", map_location=device1)) |
|
|
|
model2.load_state_dict(torch.load("LLM2.pth", map_location=device2)) |
|
|
|
model3.load_state_dict(torch.load("LLM3.pth", map_location=device3)) |
|
|
|
model4.load_state_dict(torch.load("LLM4.pth", map_location=device4)) |
|
|
|
model5.load_state_dict(torch.load("LLM5.pth", map_location=device5)) |
|
|
|
model6.load_state_dict(torch.load("LLM6.pth", map_location=device6)) |
|
|
|
output.load_state_dict(torch.load("fineturning.pth", map_location=device7)) |
|
|
|
while(1): |
|
|
|
dd = input("Q> ") |
|
|
|
|
|
|
|
data = [] |
|
|
|
buna = sp.EncodeAsPieces(dd) |
|
|
|
print(buna) |
|
|
|
for a in range(len(buna)): |
|
|
|
try: |
|
|
|
data.append(torch.from_numpy(w2v.wv[buna[a]]).view(1, 1, 128).to(device1)) |
|
|
|
except KeyError: |
|
|
|
print("Not Found") |
|
|
|
dat = torch.cat(data, dim=1).to(device1) |
|
|
|
oa = "" |
|
|
|
with torch.no_grad(): |
|
|
|
model1.reset() |
|
|
|
model2.reset() |
|
|
|
model3.reset() |
|
|
|
model4.reset() |
|
|
|
model5.reset() |
|
|
|
model6.reset() |
|
|
|
oa = "" |
|
|
|
for a in range(dat.shape[1] - 1): |
|
|
|
out = model1(dat[:, a].to(torch.bfloat16)) |
|
|
|
out = model2(out.to(device2)) |
|
|
|
out = model3(out.to(device3)) |
|
|
|
out = model4(out.to(device4)) |
|
|
|
out = model5(out.to(device5)) |
|
|
|
out = model6(out.to(device6)) |
|
|
|
out = output(out.to(device7)) |
|
|
|
for b in range(maxlen - dat.shape[1]): |
|
|
|
out = model1(dat[:, -1].to(torch.bfloat16)) |
|
|
|
out = model2(out.to(device2)) |
|
|
|
out = model3(out.to(device3)) |
|
|
|
out = model4(out.to(device4)) |
|
|
|
out = model5(out.to(device5)) |
|
|
|
out = model6(out.to(device6)) |
|
|
|
out = output(out.to(device7)) |
|
|
|
sfo = torch.nn.functional.softmax(out, dim=-1) |
|
|
|
wid = torch.argmax(sfo, dim=-1).item() |
|
|
|
if (wid != outputdim - 1): |
|
|
|
try: |
|
|
|
wd = i2w[wid] |
|
|
|
except: |
|
|
|
oa = oa + "ERROR" |
|
|
|
else: |
|
|
|
oa = oa + wd |
|
|
|
dat = torch.cat([dat, torch.from_numpy(w2v.wv[wd]).to(device1).view(1, 1, 128)], dim=1) |
|
|
|
print("A> ", oa.replace("?", "")) |
|
|
|
def ValidationLoss(dim=512, outputdim=40000, maxlen=32): |
|
|
|
torch.manual_seed(1293431) |
|
|
|
table, i2w = GOILOAD() |
|
|
|
tagger = MeCab.Tagger("-Owakati") |
|
|
|
w2v = Word2Vec.load("word2vec.model") |
|
|
|
device1 = torch.device("cuda:0") |
|
|
|
device2 = torch.device("cuda:1") |
|
|
|
device3 = torch.device("cuda:2") |
|
|
|
device4 = torch.device("cuda:3") |
|
|
|
device5 = torch.device("cuda:4") |
|
|
|
device6 = torch.device("cuda:5") |
|
|
|
device7 = torch.device("cuda:6") |
|
|
|
lossf = torch.nn.CrossEntropyLoss() |
|
|
|
model1 = SanokaModel(dim, 2, True).to(torch.bfloat16).to(device1) |
|
|
|
model2 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device2) |
|
|
|
model3 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device3) |
|
|
|
model4 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device4) |
|
|
|
model5 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device5) |
|
|
|
model6 = SanokaModel(dim, 2, False).to(torch.bfloat16).to(device6) |
|
|
|
output = OutputLayer(dim, outputdim).to(torch.bfloat16).to(device7) |
|
|
|
|
|
|
|
model1.load_state_dict(torch.load("LLM1.pth", map_location=device1)) |
|
|
|
model2.load_state_dict(torch.load("LLM2.pth", map_location=device2)) |
|
|
|
model3.load_state_dict(torch.load("LLM3.pth", map_location=device3)) |
|
|
|
model4.load_state_dict(torch.load("LLM4.pth", map_location=device4)) |
|
|
|
model5.load_state_dict(torch.load("LLM5.pth", map_location=device5)) |
|
|
|
model6.load_state_dict(torch.load("LLM6.pth", map_location=device6)) |
|
|
|
output.load_state_dict(torch.load("output.pth", map_location=device7)) |
|
|
|
dd = input("TestData> ") |
|
|
|
lossf = torch.nn.CrossEntropyLoss() |
|
|
|
data = [] |
|
|
|
buna = tagger.parse(dd).split() |
|
|
|
trued = torch.tensor([table[dfg] for dfg in buna]).to(torch.long).unsqueeze(dim=0) |
|
|
|
print(buna) |
|
|
|
print(trued) |
|
|
|
for a in range(len(buna)): |
|
|
|
try: |
|
|
|
data.append(torch.from_numpy(w2v.wv[buna[a]]).view(1, 1, 128).to(device1)) |
|
|
|
except KeyError: |
|
|
|
print("Not Found") |
|
|
|
dat = torch.cat(data, dim=1).to(device1) |
|
|
|
oa = "" |
|
|
|
loss = 0.00 |
|
|
|
with torch.no_grad(): |
|
|
|
model1.reset() |
|
|
|
model2.reset() |
|
|
|
model3.reset() |
|
|
|
model4.reset() |
|
|
|
model5.reset() |
|
|
|
model6.reset() |
|
|
|
oa = "" |
|
|
|
for a in range(dat.shape[1] - 1): |
|
|
|
out = model1(dat[:, a]) |
|
|
|
out = model2(out.to(device2)) |
|
|
|
out = model3(out.to(device3)) |
|
|
|
out = model4(out.to(device4)) |
|
|
|
out = model5(out.to(device5)) |
|
|
|
out = model6(out.to(device6)) |
|
|
|
out = output(out.to(device7)) |
|
|
|
sfo = torch.nn.functional.softmax(out, dim=-1) |
|
|
|
wid = torch.argmax(sfo, dim=-1).item() |
|
|
|
try: |
|
|
|
wd = i2w[wid] |
|
|
|
except: |
|
|
|
oa = oa + "ERROR" |
|
|
|
else: |
|
oa = oa + wd |
|
|
|
loss += lossf(out, trued[:, a+1].to(device2)) |
|
|
|
print("validationloss", loss.item() / dat.shape[1], "preview", oa) |
|
if __name__ == "__main__": |
|
|
|
|
|
|