Artrajz commited on
Commit
31da78e
1 Parent(s): 01ea1e1
bert/ProsodyModel.py DELETED
@@ -1,75 +0,0 @@
1
- import os
2
- import torch
3
- import torch.nn as nn
4
- import torch.nn.functional as F
5
-
6
- from transformers import BertModel, BertConfig, BertTokenizer
7
-
8
-
9
- class CharEmbedding(nn.Module):
10
- def __init__(self, model_dir):
11
- super().__init__()
12
- self.tokenizer = BertTokenizer.from_pretrained(model_dir)
13
- self.bert_config = BertConfig.from_pretrained(model_dir)
14
- self.hidden_size = self.bert_config.hidden_size
15
- self.bert = BertModel(self.bert_config)
16
- self.proj = nn.Linear(self.hidden_size, 256)
17
- self.linear = nn.Linear(256, 3)
18
-
19
- def text2Token(self, text):
20
- token = self.tokenizer.tokenize(text)
21
- txtid = self.tokenizer.convert_tokens_to_ids(token)
22
- return txtid
23
-
24
- def forward(self, inputs_ids, inputs_masks, tokens_type_ids):
25
- out_seq = self.bert(input_ids=inputs_ids,
26
- attention_mask=inputs_masks,
27
- token_type_ids=tokens_type_ids)[0]
28
- out_seq = self.proj(out_seq)
29
- return out_seq
30
-
31
-
32
- class TTSProsody(object):
33
- def __init__(self, path, device):
34
- self.device = device
35
- self.char_model = CharEmbedding(path)
36
- self.char_model.load_state_dict(
37
- torch.load(
38
- os.path.join(path, 'prosody_model.pt'),
39
- map_location="cpu"
40
- ),
41
- strict=False
42
- )
43
- self.char_model.eval()
44
- self.char_model.to(self.device)
45
-
46
- def get_char_embeds(self, text):
47
- input_ids = self.char_model.text2Token(text)
48
- input_masks = [1] * len(input_ids)
49
- type_ids = [0] * len(input_ids)
50
- input_ids = torch.LongTensor([input_ids]).to(self.device)
51
- input_masks = torch.LongTensor([input_masks]).to(self.device)
52
- type_ids = torch.LongTensor([type_ids]).to(self.device)
53
-
54
- with torch.no_grad():
55
- char_embeds = self.char_model(
56
- input_ids, input_masks, type_ids).squeeze(0).cpu()
57
- return char_embeds
58
-
59
- def expand_for_phone(self, char_embeds, length): # length of phones for char
60
- assert char_embeds.size(0) == len(length)
61
- expand_vecs = list()
62
- for vec, leng in zip(char_embeds, length):
63
- vec = vec.expand(leng, -1)
64
- expand_vecs.append(vec)
65
- expand_embeds = torch.cat(expand_vecs, 0)
66
- assert expand_embeds.size(0) == sum(length)
67
- return expand_embeds.numpy()
68
-
69
-
70
- if __name__ == "__main__":
71
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
72
- prosody = TTSProsody('./bert/', device)
73
- while True:
74
- text = input("请输入文本:")
75
- prosody.get_char_embeds(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bert/__init__.py DELETED
@@ -1,2 +0,0 @@
1
- """ from https://github.com/PlayVoice/vits_chinese """
2
- from .ProsodyModel import TTSProsody
 
 
 
bert/config.json DELETED
@@ -1,19 +0,0 @@
1
- {
2
- "attention_probs_dropout_prob": 0.1,
3
- "directionality": "bidi",
4
- "hidden_act": "gelu",
5
- "hidden_dropout_prob": 0.1,
6
- "hidden_size": 768,
7
- "initializer_range": 0.02,
8
- "intermediate_size": 3072,
9
- "max_position_embeddings": 512,
10
- "num_attention_heads": 12,
11
- "num_hidden_layers": 12,
12
- "pooler_fc_size": 768,
13
- "pooler_num_attention_heads": 12,
14
- "pooler_num_fc_layers": 3,
15
- "pooler_size_per_head": 128,
16
- "pooler_type": "first_token_transform",
17
- "type_vocab_size": 2,
18
- "vocab_size": 21128
19
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bert/prosody_model.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3accec7a0d5cbfccaa8a42b96374a91d442a69801c6a01402baae3bf06b8c015
3
- size 409941419
 
 
 
 
bert/prosody_tool.py DELETED
@@ -1,426 +0,0 @@
1
- def is_chinese(uchar):
2
- if uchar >= u'\u4e00' and uchar <= u'\u9fa5':
3
- return True
4
- else:
5
- return False
6
-
7
-
8
- pinyin_dict = {
9
- "a": ("^", "a"),
10
- "ai": ("^", "ai"),
11
- "an": ("^", "an"),
12
- "ang": ("^", "ang"),
13
- "ao": ("^", "ao"),
14
- "ba": ("b", "a"),
15
- "bai": ("b", "ai"),
16
- "ban": ("b", "an"),
17
- "bang": ("b", "ang"),
18
- "bao": ("b", "ao"),
19
- "be": ("b", "e"),
20
- "bei": ("b", "ei"),
21
- "ben": ("b", "en"),
22
- "beng": ("b", "eng"),
23
- "bi": ("b", "i"),
24
- "bian": ("b", "ian"),
25
- "biao": ("b", "iao"),
26
- "bie": ("b", "ie"),
27
- "bin": ("b", "in"),
28
- "bing": ("b", "ing"),
29
- "bo": ("b", "o"),
30
- "bu": ("b", "u"),
31
- "ca": ("c", "a"),
32
- "cai": ("c", "ai"),
33
- "can": ("c", "an"),
34
- "cang": ("c", "ang"),
35
- "cao": ("c", "ao"),
36
- "ce": ("c", "e"),
37
- "cen": ("c", "en"),
38
- "ceng": ("c", "eng"),
39
- "cha": ("ch", "a"),
40
- "chai": ("ch", "ai"),
41
- "chan": ("ch", "an"),
42
- "chang": ("ch", "ang"),
43
- "chao": ("ch", "ao"),
44
- "che": ("ch", "e"),
45
- "chen": ("ch", "en"),
46
- "cheng": ("ch", "eng"),
47
- "chi": ("ch", "iii"),
48
- "chong": ("ch", "ong"),
49
- "chou": ("ch", "ou"),
50
- "chu": ("ch", "u"),
51
- "chua": ("ch", "ua"),
52
- "chuai": ("ch", "uai"),
53
- "chuan": ("ch", "uan"),
54
- "chuang": ("ch", "uang"),
55
- "chui": ("ch", "uei"),
56
- "chun": ("ch", "uen"),
57
- "chuo": ("ch", "uo"),
58
- "ci": ("c", "ii"),
59
- "cong": ("c", "ong"),
60
- "cou": ("c", "ou"),
61
- "cu": ("c", "u"),
62
- "cuan": ("c", "uan"),
63
- "cui": ("c", "uei"),
64
- "cun": ("c", "uen"),
65
- "cuo": ("c", "uo"),
66
- "da": ("d", "a"),
67
- "dai": ("d", "ai"),
68
- "dan": ("d", "an"),
69
- "dang": ("d", "ang"),
70
- "dao": ("d", "ao"),
71
- "de": ("d", "e"),
72
- "dei": ("d", "ei"),
73
- "den": ("d", "en"),
74
- "deng": ("d", "eng"),
75
- "di": ("d", "i"),
76
- "dia": ("d", "ia"),
77
- "dian": ("d", "ian"),
78
- "diao": ("d", "iao"),
79
- "die": ("d", "ie"),
80
- "ding": ("d", "ing"),
81
- "diu": ("d", "iou"),
82
- "dong": ("d", "ong"),
83
- "dou": ("d", "ou"),
84
- "du": ("d", "u"),
85
- "duan": ("d", "uan"),
86
- "dui": ("d", "uei"),
87
- "dun": ("d", "uen"),
88
- "duo": ("d", "uo"),
89
- "e": ("^", "e"),
90
- "ei": ("^", "ei"),
91
- "en": ("^", "en"),
92
- "ng": ("^", "en"),
93
- "eng": ("^", "eng"),
94
- "er": ("^", "er"),
95
- "fa": ("f", "a"),
96
- "fan": ("f", "an"),
97
- "fang": ("f", "ang"),
98
- "fei": ("f", "ei"),
99
- "fen": ("f", "en"),
100
- "feng": ("f", "eng"),
101
- "fo": ("f", "o"),
102
- "fou": ("f", "ou"),
103
- "fu": ("f", "u"),
104
- "ga": ("g", "a"),
105
- "gai": ("g", "ai"),
106
- "gan": ("g", "an"),
107
- "gang": ("g", "ang"),
108
- "gao": ("g", "ao"),
109
- "ge": ("g", "e"),
110
- "gei": ("g", "ei"),
111
- "gen": ("g", "en"),
112
- "geng": ("g", "eng"),
113
- "gong": ("g", "ong"),
114
- "gou": ("g", "ou"),
115
- "gu": ("g", "u"),
116
- "gua": ("g", "ua"),
117
- "guai": ("g", "uai"),
118
- "guan": ("g", "uan"),
119
- "guang": ("g", "uang"),
120
- "gui": ("g", "uei"),
121
- "gun": ("g", "uen"),
122
- "guo": ("g", "uo"),
123
- "ha": ("h", "a"),
124
- "hai": ("h", "ai"),
125
- "han": ("h", "an"),
126
- "hang": ("h", "ang"),
127
- "hao": ("h", "ao"),
128
- "he": ("h", "e"),
129
- "hei": ("h", "ei"),
130
- "hen": ("h", "en"),
131
- "heng": ("h", "eng"),
132
- "hong": ("h", "ong"),
133
- "hou": ("h", "ou"),
134
- "hu": ("h", "u"),
135
- "hua": ("h", "ua"),
136
- "huai": ("h", "uai"),
137
- "huan": ("h", "uan"),
138
- "huang": ("h", "uang"),
139
- "hui": ("h", "uei"),
140
- "hun": ("h", "uen"),
141
- "huo": ("h", "uo"),
142
- "ji": ("j", "i"),
143
- "jia": ("j", "ia"),
144
- "jian": ("j", "ian"),
145
- "jiang": ("j", "iang"),
146
- "jiao": ("j", "iao"),
147
- "jie": ("j", "ie"),
148
- "jin": ("j", "in"),
149
- "jing": ("j", "ing"),
150
- "jiong": ("j", "iong"),
151
- "jiu": ("j", "iou"),
152
- "ju": ("j", "v"),
153
- "juan": ("j", "van"),
154
- "jue": ("j", "ve"),
155
- "jun": ("j", "vn"),
156
- "ka": ("k", "a"),
157
- "kai": ("k", "ai"),
158
- "kan": ("k", "an"),
159
- "kang": ("k", "ang"),
160
- "kao": ("k", "ao"),
161
- "ke": ("k", "e"),
162
- "kei": ("k", "ei"),
163
- "ken": ("k", "en"),
164
- "keng": ("k", "eng"),
165
- "kong": ("k", "ong"),
166
- "kou": ("k", "ou"),
167
- "ku": ("k", "u"),
168
- "kua": ("k", "ua"),
169
- "kuai": ("k", "uai"),
170
- "kuan": ("k", "uan"),
171
- "kuang": ("k", "uang"),
172
- "kui": ("k", "uei"),
173
- "kun": ("k", "uen"),
174
- "kuo": ("k", "uo"),
175
- "la": ("l", "a"),
176
- "lai": ("l", "ai"),
177
- "lan": ("l", "an"),
178
- "lang": ("l", "ang"),
179
- "lao": ("l", "ao"),
180
- "le": ("l", "e"),
181
- "lei": ("l", "ei"),
182
- "leng": ("l", "eng"),
183
- "li": ("l", "i"),
184
- "lia": ("l", "ia"),
185
- "lian": ("l", "ian"),
186
- "liang": ("l", "iang"),
187
- "liao": ("l", "iao"),
188
- "lie": ("l", "ie"),
189
- "lin": ("l", "in"),
190
- "ling": ("l", "ing"),
191
- "liu": ("l", "iou"),
192
- "lo": ("l", "o"),
193
- "long": ("l", "ong"),
194
- "lou": ("l", "ou"),
195
- "lu": ("l", "u"),
196
- "lv": ("l", "v"),
197
- "luan": ("l", "uan"),
198
- "lve": ("l", "ve"),
199
- "lue": ("l", "ve"),
200
- "lun": ("l", "uen"),
201
- "luo": ("l", "uo"),
202
- "ma": ("m", "a"),
203
- "mai": ("m", "ai"),
204
- "man": ("m", "an"),
205
- "mang": ("m", "ang"),
206
- "mao": ("m", "ao"),
207
- "me": ("m", "e"),
208
- "mei": ("m", "ei"),
209
- "men": ("m", "en"),
210
- "meng": ("m", "eng"),
211
- "mi": ("m", "i"),
212
- "mian": ("m", "ian"),
213
- "miao": ("m", "iao"),
214
- "mie": ("m", "ie"),
215
- "min": ("m", "in"),
216
- "ming": ("m", "ing"),
217
- "miu": ("m", "iou"),
218
- "mo": ("m", "o"),
219
- "mou": ("m", "ou"),
220
- "mu": ("m", "u"),
221
- "na": ("n", "a"),
222
- "nai": ("n", "ai"),
223
- "nan": ("n", "an"),
224
- "nang": ("n", "ang"),
225
- "nao": ("n", "ao"),
226
- "ne": ("n", "e"),
227
- "nei": ("n", "ei"),
228
- "nen": ("n", "en"),
229
- "neng": ("n", "eng"),
230
- "ni": ("n", "i"),
231
- "nia": ("n", "ia"),
232
- "nian": ("n", "ian"),
233
- "niang": ("n", "iang"),
234
- "niao": ("n", "iao"),
235
- "nie": ("n", "ie"),
236
- "nin": ("n", "in"),
237
- "ning": ("n", "ing"),
238
- "niu": ("n", "iou"),
239
- "nong": ("n", "ong"),
240
- "nou": ("n", "ou"),
241
- "nu": ("n", "u"),
242
- "nv": ("n", "v"),
243
- "nuan": ("n", "uan"),
244
- "nve": ("n", "ve"),
245
- "nue": ("n", "ve"),
246
- "nuo": ("n", "uo"),
247
- "o": ("^", "o"),
248
- "ou": ("^", "ou"),
249
- "pa": ("p", "a"),
250
- "pai": ("p", "ai"),
251
- "pan": ("p", "an"),
252
- "pang": ("p", "ang"),
253
- "pao": ("p", "ao"),
254
- "pe": ("p", "e"),
255
- "pei": ("p", "ei"),
256
- "pen": ("p", "en"),
257
- "peng": ("p", "eng"),
258
- "pi": ("p", "i"),
259
- "pian": ("p", "ian"),
260
- "piao": ("p", "iao"),
261
- "pie": ("p", "ie"),
262
- "pin": ("p", "in"),
263
- "ping": ("p", "ing"),
264
- "po": ("p", "o"),
265
- "pou": ("p", "ou"),
266
- "pu": ("p", "u"),
267
- "qi": ("q", "i"),
268
- "qia": ("q", "ia"),
269
- "qian": ("q", "ian"),
270
- "qiang": ("q", "iang"),
271
- "qiao": ("q", "iao"),
272
- "qie": ("q", "ie"),
273
- "qin": ("q", "in"),
274
- "qing": ("q", "ing"),
275
- "qiong": ("q", "iong"),
276
- "qiu": ("q", "iou"),
277
- "qu": ("q", "v"),
278
- "quan": ("q", "van"),
279
- "que": ("q", "ve"),
280
- "qun": ("q", "vn"),
281
- "ran": ("r", "an"),
282
- "rang": ("r", "ang"),
283
- "rao": ("r", "ao"),
284
- "re": ("r", "e"),
285
- "ren": ("r", "en"),
286
- "reng": ("r", "eng"),
287
- "ri": ("r", "iii"),
288
- "rong": ("r", "ong"),
289
- "rou": ("r", "ou"),
290
- "ru": ("r", "u"),
291
- "rua": ("r", "ua"),
292
- "ruan": ("r", "uan"),
293
- "rui": ("r", "uei"),
294
- "run": ("r", "uen"),
295
- "ruo": ("r", "uo"),
296
- "sa": ("s", "a"),
297
- "sai": ("s", "ai"),
298
- "san": ("s", "an"),
299
- "sang": ("s", "ang"),
300
- "sao": ("s", "ao"),
301
- "se": ("s", "e"),
302
- "sen": ("s", "en"),
303
- "seng": ("s", "eng"),
304
- "sha": ("sh", "a"),
305
- "shai": ("sh", "ai"),
306
- "shan": ("sh", "an"),
307
- "shang": ("sh", "ang"),
308
- "shao": ("sh", "ao"),
309
- "she": ("sh", "e"),
310
- "shei": ("sh", "ei"),
311
- "shen": ("sh", "en"),
312
- "sheng": ("sh", "eng"),
313
- "shi": ("sh", "iii"),
314
- "shou": ("sh", "ou"),
315
- "shu": ("sh", "u"),
316
- "shua": ("sh", "ua"),
317
- "shuai": ("sh", "uai"),
318
- "shuan": ("sh", "uan"),
319
- "shuang": ("sh", "uang"),
320
- "shui": ("sh", "uei"),
321
- "shun": ("sh", "uen"),
322
- "shuo": ("sh", "uo"),
323
- "si": ("s", "ii"),
324
- "song": ("s", "ong"),
325
- "sou": ("s", "ou"),
326
- "su": ("s", "u"),
327
- "suan": ("s", "uan"),
328
- "sui": ("s", "uei"),
329
- "sun": ("s", "uen"),
330
- "suo": ("s", "uo"),
331
- "ta": ("t", "a"),
332
- "tai": ("t", "ai"),
333
- "tan": ("t", "an"),
334
- "tang": ("t", "ang"),
335
- "tao": ("t", "ao"),
336
- "te": ("t", "e"),
337
- "tei": ("t", "ei"),
338
- "teng": ("t", "eng"),
339
- "ti": ("t", "i"),
340
- "tian": ("t", "ian"),
341
- "tiao": ("t", "iao"),
342
- "tie": ("t", "ie"),
343
- "ting": ("t", "ing"),
344
- "tong": ("t", "ong"),
345
- "tou": ("t", "ou"),
346
- "tu": ("t", "u"),
347
- "tuan": ("t", "uan"),
348
- "tui": ("t", "uei"),
349
- "tun": ("t", "uen"),
350
- "tuo": ("t", "uo"),
351
- "wa": ("^", "ua"),
352
- "wai": ("^", "uai"),
353
- "wan": ("^", "uan"),
354
- "wang": ("^", "uang"),
355
- "wei": ("^", "uei"),
356
- "wen": ("^", "uen"),
357
- "weng": ("^", "ueng"),
358
- "wo": ("^", "uo"),
359
- "wu": ("^", "u"),
360
- "xi": ("x", "i"),
361
- "xia": ("x", "ia"),
362
- "xian": ("x", "ian"),
363
- "xiang": ("x", "iang"),
364
- "xiao": ("x", "iao"),
365
- "xie": ("x", "ie"),
366
- "xin": ("x", "in"),
367
- "xing": ("x", "ing"),
368
- "xiong": ("x", "iong"),
369
- "xiu": ("x", "iou"),
370
- "xu": ("x", "v"),
371
- "xuan": ("x", "van"),
372
- "xue": ("x", "ve"),
373
- "xun": ("x", "vn"),
374
- "ya": ("^", "ia"),
375
- "yan": ("^", "ian"),
376
- "yang": ("^", "iang"),
377
- "yao": ("^", "iao"),
378
- "ye": ("^", "ie"),
379
- "yi": ("^", "i"),
380
- "yin": ("^", "in"),
381
- "ying": ("^", "ing"),
382
- "yo": ("^", "iou"),
383
- "yong": ("^", "iong"),
384
- "you": ("^", "iou"),
385
- "yu": ("^", "v"),
386
- "yuan": ("^", "van"),
387
- "yue": ("^", "ve"),
388
- "yun": ("^", "vn"),
389
- "za": ("z", "a"),
390
- "zai": ("z", "ai"),
391
- "zan": ("z", "an"),
392
- "zang": ("z", "ang"),
393
- "zao": ("z", "ao"),
394
- "ze": ("z", "e"),
395
- "zei": ("z", "ei"),
396
- "zen": ("z", "en"),
397
- "zeng": ("z", "eng"),
398
- "zha": ("zh", "a"),
399
- "zhai": ("zh", "ai"),
400
- "zhan": ("zh", "an"),
401
- "zhang": ("zh", "ang"),
402
- "zhao": ("zh", "ao"),
403
- "zhe": ("zh", "e"),
404
- "zhei": ("zh", "ei"),
405
- "zhen": ("zh", "en"),
406
- "zheng": ("zh", "eng"),
407
- "zhi": ("zh", "iii"),
408
- "zhong": ("zh", "ong"),
409
- "zhou": ("zh", "ou"),
410
- "zhu": ("zh", "u"),
411
- "zhua": ("zh", "ua"),
412
- "zhuai": ("zh", "uai"),
413
- "zhuan": ("zh", "uan"),
414
- "zhuang": ("zh", "uang"),
415
- "zhui": ("zh", "uei"),
416
- "zhun": ("zh", "uen"),
417
- "zhuo": ("zh", "uo"),
418
- "zi": ("z", "ii"),
419
- "zong": ("z", "ong"),
420
- "zou": ("z", "ou"),
421
- "zu": ("z", "u"),
422
- "zuan": ("z", "uan"),
423
- "zui": ("z", "uei"),
424
- "zun": ("z", "uen"),
425
- "zuo": ("z", "uo"),
426
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bert/vocab.txt DELETED
The diff for this file is too large to render. See raw diff