mrfakename commited on
Commit
675a486
·
1 Parent(s): d430de8

Switch phonemizer

Browse files
Files changed (2) hide show
  1. ljspeechimportable.py +5 -6
  2. styletts2importable.py +8 -8
ljspeechimportable.py CHANGED
@@ -1,5 +1,4 @@
1
  from cached_path import cached_path
2
- from dp.phonemizer import Phonemizer
3
 
4
 
5
  import torch
@@ -68,10 +67,10 @@ def compute_style(ref_dicts):
68
  return reference_embeddings
69
 
70
  # load phonemizer
71
- # import phonemizer
72
- # global_phonemizer = phonemizer.backend.EspeakBackend(language='en-us', preserve_punctuation=True, with_stress=True, words_mismatch='ignore')
73
 
74
- phonemizer = Phonemizer.from_checkpoint(str(cached_path('https://public-asai-dl-models.s3.eu-central-1.amazonaws.com/DeepPhonemizer/en_us_cmudict_ipa_forward.pt')))
75
 
76
 
77
  config = yaml.safe_load(open(str(cached_path('hf://yl4579/StyleTTS2-LJSpeech/Models/LJSpeech/config.yml'))))
@@ -128,7 +127,7 @@ sampler = DiffusionSampler(
128
  def inference(text, noise, diffusion_steps=5, embedding_scale=1):
129
  text = text.strip()
130
  text = text.replace('"', '')
131
- ps = phonemizer([text], lang='en_us')
132
  ps = word_tokenize(ps[0])
133
  ps = ' '.join(ps)
134
 
@@ -177,7 +176,7 @@ def inference(text, noise, diffusion_steps=5, embedding_scale=1):
177
  def LFinference(text, s_prev, noise, alpha=0.7, diffusion_steps=5, embedding_scale=1):
178
  text = text.strip()
179
  text = text.replace('"', '')
180
- ps = phonemizer([text], lang='en_us')
181
  ps = word_tokenize(ps[0])
182
  ps = ' '.join(ps)
183
 
 
1
  from cached_path import cached_path
 
2
 
3
 
4
  import torch
 
67
  return reference_embeddings
68
 
69
  # load phonemizer
70
+ import phonemizer
71
+ global_phonemizer = phonemizer.backend.EspeakBackend(language='en-us', preserve_punctuation=True, with_stress=True, words_mismatch='ignore')
72
 
73
+ # phonemizer = Phonemizer.from_checkpoint(str(cached_path('https://public-asai-dl-models.s3.eu-central-1.amazonaws.com/DeepPhonemizer/en_us_cmudict_ipa_forward.pt')))
74
 
75
 
76
  config = yaml.safe_load(open(str(cached_path('hf://yl4579/StyleTTS2-LJSpeech/Models/LJSpeech/config.yml'))))
 
127
  def inference(text, noise, diffusion_steps=5, embedding_scale=1):
128
  text = text.strip()
129
  text = text.replace('"', '')
130
+ ps = global_phonemizer.phonemize([text])
131
  ps = word_tokenize(ps[0])
132
  ps = ' '.join(ps)
133
 
 
176
  def LFinference(text, s_prev, noise, alpha=0.7, diffusion_steps=5, embedding_scale=1):
177
  text = text.strip()
178
  text = text.replace('"', '')
179
+ ps = global_phonemizer.phonemize([text])
180
  ps = word_tokenize(ps[0])
181
  ps = ' '.join(ps)
182
 
styletts2importable.py CHANGED
@@ -1,6 +1,6 @@
1
  from cached_path import cached_path
2
 
3
- from dp.phonemizer import Phonemizer
4
  print("NLTK")
5
  import nltk
6
  nltk.download('punkt')
@@ -73,9 +73,9 @@ elif torch.backends.mps.is_available():
73
  print("MPS would be available but cannot be used rn")
74
  # device = 'mps'
75
 
76
-
77
- # global_phonemizer = phonemizer.backend.EspeakBackend(language='en-us', preserve_punctuation=True, with_stress=True)
78
- phonemizer = Phonemizer.from_checkpoint(str(cached_path('https://public-asai-dl-models.s3.eu-central-1.amazonaws.com/DeepPhonemizer/en_us_cmudict_ipa_forward.pt')))
79
 
80
 
81
  # config = yaml.safe_load(open("Models/LibriTTS/config.yml"))
@@ -133,7 +133,7 @@ sampler = DiffusionSampler(
133
 
134
  def inference(text, ref_s, alpha = 0.3, beta = 0.7, diffusion_steps=5, embedding_scale=1):
135
  text = text.strip()
136
- ps = phonemizer([text], lang='en_us')
137
  ps = word_tokenize(ps[0])
138
  ps = ' '.join(ps)
139
  tokens = textclenaer(ps)
@@ -202,7 +202,7 @@ def inference(text, ref_s, alpha = 0.3, beta = 0.7, diffusion_steps=5, embedding
202
 
203
  def LFinference(text, s_prev, ref_s, alpha = 0.3, beta = 0.7, t = 0.7, diffusion_steps=5, embedding_scale=1):
204
  text = text.strip()
205
- ps = phonemizer([text], lang='en_us')
206
  ps = word_tokenize(ps[0])
207
  ps = ' '.join(ps)
208
  ps = ps.replace('``', '"')
@@ -279,7 +279,7 @@ def LFinference(text, s_prev, ref_s, alpha = 0.3, beta = 0.7, t = 0.7, diffusion
279
 
280
  def STinference(text, ref_s, ref_text, alpha = 0.3, beta = 0.7, diffusion_steps=5, embedding_scale=1):
281
  text = text.strip()
282
- ps = phonemizer([text], lang='en_us')
283
  ps = word_tokenize(ps[0])
284
  ps = ' '.join(ps)
285
 
@@ -288,7 +288,7 @@ def STinference(text, ref_s, ref_text, alpha = 0.3, beta = 0.7, diffusion_steps=
288
  tokens = torch.LongTensor(tokens).to(device).unsqueeze(0)
289
 
290
  ref_text = ref_text.strip()
291
- ps = phonemizer([ref_text], lang='en_us')
292
  ps = word_tokenize(ps[0])
293
  ps = ' '.join(ps)
294
 
 
1
  from cached_path import cached_path
2
 
3
+ # from dp.phonemizer import Phonemizer
4
  print("NLTK")
5
  import nltk
6
  nltk.download('punkt')
 
73
  print("MPS would be available but cannot be used rn")
74
  # device = 'mps'
75
 
76
+ import phonemizer
77
+ global_phonemizer = phonemizer.backend.EspeakBackend(language='en-us', preserve_punctuation=True, with_stress=True)
78
+ # phonemizer = Phonemizer.from_checkpoint(str(cached_path('https://public-asai-dl-models.s3.eu-central-1.amazonaws.com/DeepPhonemizer/en_us_cmudict_ipa_forward.pt')))
79
 
80
 
81
  # config = yaml.safe_load(open("Models/LibriTTS/config.yml"))
 
133
 
134
  def inference(text, ref_s, alpha = 0.3, beta = 0.7, diffusion_steps=5, embedding_scale=1):
135
  text = text.strip()
136
+ ps = global_phonemizer.phonemize([text])
137
  ps = word_tokenize(ps[0])
138
  ps = ' '.join(ps)
139
  tokens = textclenaer(ps)
 
202
 
203
  def LFinference(text, s_prev, ref_s, alpha = 0.3, beta = 0.7, t = 0.7, diffusion_steps=5, embedding_scale=1):
204
  text = text.strip()
205
+ ps = global_phonemizer.phonemize([text])
206
  ps = word_tokenize(ps[0])
207
  ps = ' '.join(ps)
208
  ps = ps.replace('``', '"')
 
279
 
280
  def STinference(text, ref_s, ref_text, alpha = 0.3, beta = 0.7, diffusion_steps=5, embedding_scale=1):
281
  text = text.strip()
282
+ ps = global_phonemizer.phonemize([text])
283
  ps = word_tokenize(ps[0])
284
  ps = ' '.join(ps)
285
 
 
288
  tokens = torch.LongTensor(tokens).to(device).unsqueeze(0)
289
 
290
  ref_text = ref_text.strip()
291
+ ps = global_phonemizer.phonemize([ref_text])
292
  ps = word_tokenize(ps[0])
293
  ps = ' '.join(ps)
294