seanghay commited on
Commit
b45f31e
โ€ข
1 Parent(s): 9d0a723
Files changed (7) hide show
  1. .gitignore +26 -0
  2. khm/G_100000.pth +3 -0
  3. khm/config.json +87 -0
  4. khm/vocab.txt +74 -0
  5. main.py +44 -0
  6. packages.txt +2 -0
  7. requirements.txt +2 -0
.gitignore ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # General
2
+ .DS_Store
3
+ .AppleDouble
4
+ .LSOverride
5
+
6
+ # Icon must end with two \r
7
+ Icon
8
+
9
+ # Thumbnails
10
+ ._*
11
+
12
+ # Files that might appear in the root of a volume
13
+ .DocumentRevisions-V100
14
+ .fseventsd
15
+ .Spotlight-V100
16
+ .TemporaryItems
17
+ .Trashes
18
+ .VolumeIcon.icns
19
+ .com.apple.timemachine.donotpresent
20
+
21
+ # Directories potentially created on remote AFP share
22
+ .AppleDB
23
+ .AppleDesktop
24
+ Network Trash Folder
25
+ Temporary Items
26
+ .apdisk
khm/G_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86a7237c6bd72beb885152631e69743290c03b4a33ea17b7c3ca851a29b9749d
3
+ size 436464113
khm/config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "eval_interval": 1000,
5
+ "seed": 1234,
6
+ "epochs": 20000,
7
+ "learning_rate": 0.0002,
8
+ "betas": [
9
+ 0.8,
10
+ 0.99
11
+ ],
12
+ "eps": 1e-09,
13
+ "batch_size": 64,
14
+ "fp16_run": true,
15
+ "lr_decay": 0.999875,
16
+ "segment_size": 8192,
17
+ "init_lr_ratio": 1,
18
+ "warmup_epochs": 0,
19
+ "c_mel": 45,
20
+ "c_kl": 1.0
21
+ },
22
+ "data": {
23
+ "training_files": "train.ltr",
24
+ "validation_files": "dev.ltr",
25
+ "text_cleaners": [
26
+ "transliteration_cleaners"
27
+ ],
28
+ "max_wav_value": 32768.0,
29
+ "sampling_rate": 16000,
30
+ "filter_length": 1024,
31
+ "hop_length": 256,
32
+ "win_length": 1024,
33
+ "n_mel_channels": 80,
34
+ "mel_fmin": 0.0,
35
+ "mel_fmax": null,
36
+ "add_blank": true,
37
+ "n_speakers": 0,
38
+ "cleaned_text": true
39
+ },
40
+ "model": {
41
+ "inter_channels": 192,
42
+ "hidden_channels": 192,
43
+ "filter_channels": 768,
44
+ "n_heads": 2,
45
+ "n_layers": 6,
46
+ "kernel_size": 3,
47
+ "p_dropout": 0.1,
48
+ "resblock": "1",
49
+ "resblock_kernel_sizes": [
50
+ 3,
51
+ 7,
52
+ 11
53
+ ],
54
+ "resblock_dilation_sizes": [
55
+ [
56
+ 1,
57
+ 3,
58
+ 5
59
+ ],
60
+ [
61
+ 1,
62
+ 3,
63
+ 5
64
+ ],
65
+ [
66
+ 1,
67
+ 3,
68
+ 5
69
+ ]
70
+ ],
71
+ "upsample_rates": [
72
+ 8,
73
+ 8,
74
+ 2,
75
+ 2
76
+ ],
77
+ "upsample_initial_channel": 512,
78
+ "upsample_kernel_sizes": [
79
+ 16,
80
+ 16,
81
+ 4,
82
+ 4
83
+ ],
84
+ "n_layers_q": 3,
85
+ "use_spectral_norm": false
86
+ }
87
+ }
khm/vocab.txt ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ แŸ’
2
+ แž™
3
+ แžข
4
+ แž†
5
+ แž‚
6
+ แž„
7
+ แžพ
8
+ แž’
9
+ แŸ‹
10
+ แžƒ
11
+ แžŒ
12
+ แžฅ
13
+ แž”
14
+ แžฝ
15
+ แžˆ
16
+ q
17
+ แŸ
18
+ แž•
19
+ แžท
20
+
21
+ แž‡
22
+ แžง
23
+ แž›
24
+ แžฑ
25
+ แž‘
26
+ แžผ
27
+ แžš
28
+ แŸŽ
29
+ แžญ
30
+ แžถ
31
+ แŸ€
32
+ แžฟ
33
+ แŸ
34
+ แŸ†
35
+ แŸ
36
+ แž
37
+ _
38
+ แŸ‰
39
+ แž€
40
+ แŸ‚
41
+ แŸ…
42
+ แŸƒ
43
+ แžŠ
44
+ แžœ
45
+ แŸˆ
46
+ แžฌ
47
+ แžซ
48
+ แŸ‡
49
+ แŸ„
50
+ แžธ
51
+ แžป
52
+ แžฏ
53
+ แž–
54
+ แžก
55
+ แž…
56
+ แž‹
57
+ แžŽ
58
+ 1
59
+ แž 
60
+ แž“
61
+ แžน
62
+ แžŸ
63
+ แž
64
+ แŸ
65
+ แž‰
66
+ แŸŠ
67
+ แžฎ
68
+ แŸŒ
69
+ แž
70
+ แžบ
71
+ แžช
72
+ -
73
+ แž˜
74
+ แž—
main.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ import re
3
+ from ttsmms import TTS
4
+ import gradio as gr
5
+
6
+ tts = TTS("khm")
7
+
8
+ def sanitize(text):
9
+ return re.sub(r"\u200b", "", text)
10
+
11
+ def generate_voice(text):
12
+ audio = tts.synthesis(text)
13
+ return (audio['sampling_rate'], audio['x'])
14
+
15
+ with gr.Blocks(title="Khmer Text to Speech with MMS") as blocks:
16
+
17
+ gr.Markdown('# Khmer Text to Speech - MMS')
18
+ gr.Markdown('MMS: Scaling Speech Technology to 1000+ languages by Meta AI')
19
+
20
+ input_text = gr.Textbox(label="แžขแž€แŸ’แžŸแžš", lines=3)
21
+ examples = gr.Examples(examples=["แž“แŸแŸ‡โ€‹แž‡แžถโ€‹แžšแžผแž”แžแžโ€‹แž‡แž“แžŸแž„แŸ’แžŸแŸแž™โ€‹แžŠแŸ‚แž›โ€‹แž”แŸแžธแž€แžกแžถแž“แŸ”"], inputs=[input_text])
22
+
23
+ run_button = gr.Button(
24
+ text="Generate",
25
+ type="button",
26
+ )
27
+
28
+ out_audio = gr.Audio(
29
+ label="แžŸแŸ†แžกแŸแž„แžŠแŸ‚แž›แž”แžถแž“แž”แž„แŸ’แž€แžพแž",
30
+ type="numpy",
31
+ )
32
+
33
+ inputs = [input_text]
34
+ outputs = [out_audio]
35
+
36
+ run_button.click(
37
+ fn=generate_voice,
38
+ inputs=inputs,
39
+ outputs=outputs,
40
+ queue=True,
41
+ )
42
+
43
+
44
+ blocks.queue(concurrency_count=1).launch(debug=True)
packages.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ libsndfile1
2
+ espeak-ng
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ ttsmms==0.6
2
+ gradio==3.32.0