Pendrokar commited on
Commit
fd48aa5
1 Parent(s): a485023

voice reorder & emojis; flag switch to nation with most speakers

Browse files
Files changed (1) hide show
  1. app.py +15 -13
app.py CHANGED
@@ -19,29 +19,31 @@ latest_commit_sha = commits[0].commit_id
19
  hf_cache_models_path = f'/home/user/.cache/huggingface/hub/models--Pendrokar--xvapitch_nvidia/snapshots/{latest_commit_sha}/'
20
  models_path = hf_cache_models_path
21
 
 
22
  voice_models = [
23
- ("Male #6671", "ccby_nvidia_hifi_6671_M"),
24
- ("Male #6670", "ccby_nvidia_hifi_6670_M"),
 
25
  ("Male #9017", "ccby_nvidia_hifi_9017_M"),
26
  ("Male #6097", "ccby_nvidia_hifi_6097_M"),
27
- ("Female #92", "ccby_nvidia_hifi_92_F"),
28
- ("Female #11697", "ccby_nvidia_hifi_11697_F"),
29
- ("Female #12787", "ccby_nvidia_hifi_12787_F"),
30
- ("Female #11614", "ccby_nv_hifi_11614_F"),
31
  ("Female #8051", "ccby_nvidia_hifi_8051_F"),
 
32
  ("Female #9136", "ccby_nvidia_hifi_9136_F"),
33
  ]
 
34
  current_voice_model = None
35
  base_speaker_emb = ''
36
 
37
  # order ranked by similarity to English due to the xVASynth's use of ARPAbet instead of IPA
38
  languages = [
39
- ("🇬🇧 EN", "en"),
40
  ("🇩🇪 DE", "de"),
41
  ("🇪🇸 ES", "es"),
42
  ("🇮🇹 IT", "it"),
43
  ("🇳🇱 NL", "nl"),
44
- ("🇵🇹 PT", "pt"),
45
  ("🇵🇱 PL", "pl"),
46
  ("🇷🇴 RO", "ro"),
47
  ("🇸🇪 SV", "sv"),
@@ -51,7 +53,7 @@ languages = [
51
  ("🇬🇷 EL", "el"),
52
  ("🇫🇷 FR", "fr"),
53
  ("🇷🇺 RU", "ru"),
54
- ("🇺🇦 UK", "uk"),
55
  ("🇹🇷 TR", "tr"),
56
  ("🇸🇦 AR", "ar"),
57
  ("🇮🇳 HI", "hi"),
@@ -60,10 +62,10 @@ languages = [
60
  ("🇨🇳 ZH", "zh"),
61
  ("🇻🇳 VI", "vi"),
62
  ("🇻🇦 LA", "la"),
63
- ("HA", "ha"),
64
- ("SW", "sw"),
65
  ("🇳🇬 YO", "yo"),
66
- ("WO", "wo"),
 
 
67
  ]
68
 
69
  # Translated from English by DeepMind's Gemini Pro
@@ -89,7 +91,7 @@ default_text = {
89
  "ro": "Așa sună vocea mea.",
90
  "ru": "Вот как звучит мой голос.",
91
  "sv": "Såhär låter min röst.",
92
- "sw": "Sauti yangu inasikika hivi.",
93
  "tr": "Benim sesimin sesi böyle.",
94
  "uk": "Ось як звучить мій голос.",
95
  "vi": "Đây là giọng nói của tôi.",
 
19
  hf_cache_models_path = f'/home/user/.cache/huggingface/hub/models--Pendrokar--xvapitch_nvidia/snapshots/{latest_commit_sha}/'
20
  models_path = hf_cache_models_path
21
 
22
+ # ordered from most emotional and respects pauses to ones that do the least
23
  voice_models = [
24
+ ("👨‍🦳 #6671", "ccby_nvidia_hifi_6671_M"),
25
+ ("👱‍♀️ 🇬🇧 #92", "ccby_nvidia_hifi_92_F"),
26
+ ("🧔 #6670", "ccby_nvidia_hifi_6670_M"),
27
  ("Male #9017", "ccby_nvidia_hifi_9017_M"),
28
  ("Male #6097", "ccby_nvidia_hifi_6097_M"),
29
+ ("👩‍🦱 #12787", "ccby_nvidia_hifi_12787_F"),
30
+ ("👵 #11614", "ccby_nv_hifi_11614_F"),
 
 
31
  ("Female #8051", "ccby_nvidia_hifi_8051_F"),
32
+ ("👩‍🦳 #11697", "ccby_nvidia_hifi_11697_F"),
33
  ("Female #9136", "ccby_nvidia_hifi_9136_F"),
34
  ]
35
+
36
  current_voice_model = None
37
  base_speaker_emb = ''
38
 
39
  # order ranked by similarity to English due to the xVASynth's use of ARPAbet instead of IPA
40
  languages = [
41
+ ("🇺🇸 EN", "en"),
42
  ("🇩🇪 DE", "de"),
43
  ("🇪🇸 ES", "es"),
44
  ("🇮🇹 IT", "it"),
45
  ("🇳🇱 NL", "nl"),
46
+ ("🇧🇷 PT", "pt"),
47
  ("🇵🇱 PL", "pl"),
48
  ("🇷🇴 RO", "ro"),
49
  ("🇸🇪 SV", "sv"),
 
53
  ("🇬🇷 EL", "el"),
54
  ("🇫🇷 FR", "fr"),
55
  ("🇷🇺 RU", "ru"),
56
+ ("🇺🇦 UA", "uk"),
57
  ("🇹🇷 TR", "tr"),
58
  ("🇸🇦 AR", "ar"),
59
  ("🇮🇳 HI", "hi"),
 
62
  ("🇨🇳 ZH", "zh"),
63
  ("🇻🇳 VI", "vi"),
64
  ("🇻🇦 LA", "la"),
 
 
65
  ("🇳🇬 YO", "yo"),
66
+ ("Swahili", "sw"),
67
+ ("Hausa", "ha"),
68
+ ("Wolof", "wo"),
69
  ]
70
 
71
  # Translated from English by DeepMind's Gemini Pro
 
91
  "ro": "Așa sună vocea mea.",
92
  "ru": "Вот как звучит мой голос.",
93
  "sv": "Såhär låter min röst.",
94
+ "sw": "Baba, yetu, yetu, uliye. Mbinguni, yetu, yetu. Amiiinaa!!", #civ4
95
  "tr": "Benim sesimin sesi böyle.",
96
  "uk": "Ось як звучить мій голос.",
97
  "vi": "Đây là giọng nói của tôi.",