Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
voice reorder & emojis; flag switch to nation with most speakers
Browse files
app.py
CHANGED
@@ -19,29 +19,31 @@ latest_commit_sha = commits[0].commit_id
|
|
19 |
hf_cache_models_path = f'/home/user/.cache/huggingface/hub/models--Pendrokar--xvapitch_nvidia/snapshots/{latest_commit_sha}/'
|
20 |
models_path = hf_cache_models_path
|
21 |
|
|
|
22 |
voice_models = [
|
23 |
-
("
|
24 |
-
("
|
|
|
25 |
("Male #9017", "ccby_nvidia_hifi_9017_M"),
|
26 |
("Male #6097", "ccby_nvidia_hifi_6097_M"),
|
27 |
-
("
|
28 |
-
("
|
29 |
-
("Female #12787", "ccby_nvidia_hifi_12787_F"),
|
30 |
-
("Female #11614", "ccby_nv_hifi_11614_F"),
|
31 |
("Female #8051", "ccby_nvidia_hifi_8051_F"),
|
|
|
32 |
("Female #9136", "ccby_nvidia_hifi_9136_F"),
|
33 |
]
|
|
|
34 |
current_voice_model = None
|
35 |
base_speaker_emb = ''
|
36 |
|
37 |
# order ranked by similarity to English due to the xVASynth's use of ARPAbet instead of IPA
|
38 |
languages = [
|
39 |
-
("
|
40 |
("🇩🇪 DE", "de"),
|
41 |
("🇪🇸 ES", "es"),
|
42 |
("🇮🇹 IT", "it"),
|
43 |
("🇳🇱 NL", "nl"),
|
44 |
-
("
|
45 |
("🇵🇱 PL", "pl"),
|
46 |
("🇷🇴 RO", "ro"),
|
47 |
("🇸🇪 SV", "sv"),
|
@@ -51,7 +53,7 @@ languages = [
|
|
51 |
("🇬🇷 EL", "el"),
|
52 |
("🇫🇷 FR", "fr"),
|
53 |
("🇷🇺 RU", "ru"),
|
54 |
-
("🇺🇦
|
55 |
("🇹🇷 TR", "tr"),
|
56 |
("🇸🇦 AR", "ar"),
|
57 |
("🇮🇳 HI", "hi"),
|
@@ -60,10 +62,10 @@ languages = [
|
|
60 |
("🇨🇳 ZH", "zh"),
|
61 |
("🇻🇳 VI", "vi"),
|
62 |
("🇻🇦 LA", "la"),
|
63 |
-
("HA", "ha"),
|
64 |
-
("SW", "sw"),
|
65 |
("🇳🇬 YO", "yo"),
|
66 |
-
("
|
|
|
|
|
67 |
]
|
68 |
|
69 |
# Translated from English by DeepMind's Gemini Pro
|
@@ -89,7 +91,7 @@ default_text = {
|
|
89 |
"ro": "Așa sună vocea mea.",
|
90 |
"ru": "Вот как звучит мой голос.",
|
91 |
"sv": "Såhär låter min röst.",
|
92 |
-
"sw": "
|
93 |
"tr": "Benim sesimin sesi böyle.",
|
94 |
"uk": "Ось як звучить мій голос.",
|
95 |
"vi": "Đây là giọng nói của tôi.",
|
|
|
19 |
hf_cache_models_path = f'/home/user/.cache/huggingface/hub/models--Pendrokar--xvapitch_nvidia/snapshots/{latest_commit_sha}/'
|
20 |
models_path = hf_cache_models_path
|
21 |
|
22 |
+
# ordered from most emotional and respects pauses to ones that do the least
|
23 |
voice_models = [
|
24 |
+
("👨🦳 #6671", "ccby_nvidia_hifi_6671_M"),
|
25 |
+
("👱♀️ 🇬🇧 #92", "ccby_nvidia_hifi_92_F"),
|
26 |
+
("🧔 #6670", "ccby_nvidia_hifi_6670_M"),
|
27 |
("Male #9017", "ccby_nvidia_hifi_9017_M"),
|
28 |
("Male #6097", "ccby_nvidia_hifi_6097_M"),
|
29 |
+
("👩🦱 #12787", "ccby_nvidia_hifi_12787_F"),
|
30 |
+
("👵 #11614", "ccby_nv_hifi_11614_F"),
|
|
|
|
|
31 |
("Female #8051", "ccby_nvidia_hifi_8051_F"),
|
32 |
+
("👩🦳 #11697", "ccby_nvidia_hifi_11697_F"),
|
33 |
("Female #9136", "ccby_nvidia_hifi_9136_F"),
|
34 |
]
|
35 |
+
|
36 |
current_voice_model = None
|
37 |
base_speaker_emb = ''
|
38 |
|
39 |
# order ranked by similarity to English due to the xVASynth's use of ARPAbet instead of IPA
|
40 |
languages = [
|
41 |
+
("🇺🇸 EN", "en"),
|
42 |
("🇩🇪 DE", "de"),
|
43 |
("🇪🇸 ES", "es"),
|
44 |
("🇮🇹 IT", "it"),
|
45 |
("🇳🇱 NL", "nl"),
|
46 |
+
("🇧🇷 PT", "pt"),
|
47 |
("🇵🇱 PL", "pl"),
|
48 |
("🇷🇴 RO", "ro"),
|
49 |
("🇸🇪 SV", "sv"),
|
|
|
53 |
("🇬🇷 EL", "el"),
|
54 |
("🇫🇷 FR", "fr"),
|
55 |
("🇷🇺 RU", "ru"),
|
56 |
+
("🇺🇦 UA", "uk"),
|
57 |
("🇹🇷 TR", "tr"),
|
58 |
("🇸🇦 AR", "ar"),
|
59 |
("🇮🇳 HI", "hi"),
|
|
|
62 |
("🇨🇳 ZH", "zh"),
|
63 |
("🇻🇳 VI", "vi"),
|
64 |
("🇻🇦 LA", "la"),
|
|
|
|
|
65 |
("🇳🇬 YO", "yo"),
|
66 |
+
("Swahili", "sw"),
|
67 |
+
("Hausa", "ha"),
|
68 |
+
("Wolof", "wo"),
|
69 |
]
|
70 |
|
71 |
# Translated from English by DeepMind's Gemini Pro
|
|
|
91 |
"ro": "Așa sună vocea mea.",
|
92 |
"ru": "Вот как звучит мой голос.",
|
93 |
"sv": "Såhär låter min röst.",
|
94 |
+
"sw": "Baba, yetu, yetu, uliye. Mbinguni, yetu, yetu. Amiiinaa!!", #civ4
|
95 |
"tr": "Benim sesimin sesi böyle.",
|
96 |
"uk": "Ось як звучить мій голос.",
|
97 |
"vi": "Đây là giọng nói của tôi.",
|