File size: 1,502 Bytes
bc4f3f8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
from onnx_modules.V220_OnnxInference import OnnxInferenceSession
import numpy as np
Session = OnnxInferenceSession(
{
"enc" : "onnx/BertVits2.2PT/BertVits2.2PT_enc_p.onnx",
"emb_g" : "onnx/BertVits2.2PT/BertVits2.2PT_emb.onnx",
"dp" : "onnx/BertVits2.2PT/BertVits2.2PT_dp.onnx",
"sdp" : "onnx/BertVits2.2PT/BertVits2.2PT_sdp.onnx",
"flow" : "onnx/BertVits2.2PT/BertVits2.2PT_flow.onnx",
"dec" : "onnx/BertVits2.2PT/BertVits2.2PT_dec.onnx"
},
Providers = ["CPUExecutionProvider"]
)
#这里的输入和原版是一样的,只需要在原版预处理结果出来之后加上.numpy()即可
x = np.array(
[
0,
97,
0,
8,
0,
78,
0,
8,
0,
76,
0,
37,
0,
40,
0,
97,
0,
8,
0,
23,
0,
8,
0,
74,
0,
26,
0,
104,
0,
]
)
tone = np.zeros_like(x)
language = np.zeros_like(x)
sid = np.array([0])
bert = np.random.randn(x.shape[0], 1024)
ja_bert = np.random.randn(x.shape[0], 1024)
en_bert = np.random.randn(x.shape[0], 1024)
emo = np.random.randn(512, 1)
audio = Session(
x,
tone,
language,
bert,
ja_bert,
en_bert,
emo,
sid
)
print(audio)
|