mrfakename commited on
Commit
4064aae
1 Parent(s): 14d6715

Sync from GitHub repo

Browse files

This Space is synced from the GitHub repo: https://github.com/SWivid/F5-TTS. Please submit contributions to the Space there

Files changed (3) hide show
  1. app.py +2 -3
  2. inference-cli.py +2 -3
  3. model/utils.py +2 -3
app.py CHANGED
@@ -158,9 +158,8 @@ def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence,
158
 
159
  # Calculate duration
160
  ref_audio_len = audio.shape[-1] // hop_length
161
- zh_pause_punc = r"。,、;:?!"
162
- ref_text_len = len(ref_text.encode('utf-8')) + 3 * len(re.findall(zh_pause_punc, ref_text))
163
- gen_text_len = len(gen_text.encode('utf-8')) + 3 * len(re.findall(zh_pause_punc, gen_text))
164
  duration = ref_audio_len + int(ref_audio_len / ref_text_len * gen_text_len / speed)
165
 
166
  # inference
 
158
 
159
  # Calculate duration
160
  ref_audio_len = audio.shape[-1] // hop_length
161
+ ref_text_len = len(ref_text.encode('utf-8'))
162
+ gen_text_len = len(gen_text.encode('utf-8'))
 
163
  duration = ref_audio_len + int(ref_audio_len / ref_text_len * gen_text_len / speed)
164
 
165
  # inference
inference-cli.py CHANGED
@@ -250,9 +250,8 @@ def infer_batch(ref_audio, ref_text, gen_text_batches, model,ckpt_file,file_voca
250
 
251
  # Calculate duration
252
  ref_audio_len = audio.shape[-1] // hop_length
253
- zh_pause_punc = r"。,、;:?!"
254
- ref_text_len = len(ref_text.encode('utf-8')) + 3 * len(re.findall(zh_pause_punc, ref_text))
255
- gen_text_len = len(gen_text.encode('utf-8')) + 3 * len(re.findall(zh_pause_punc, gen_text))
256
  duration = ref_audio_len + int(ref_audio_len / ref_text_len * gen_text_len / speed)
257
 
258
  # inference
 
250
 
251
  # Calculate duration
252
  ref_audio_len = audio.shape[-1] // hop_length
253
+ ref_text_len = len(ref_text.encode('utf-8'))
254
+ gen_text_len = len(gen_text.encode('utf-8'))
 
255
  duration = ref_audio_len + int(ref_audio_len / ref_text_len * gen_text_len / speed)
256
 
257
  # inference
model/utils.py CHANGED
@@ -296,9 +296,8 @@ def get_inference_prompt(
296
  # # test vocoder resynthesis
297
  # ref_audio = gt_audio
298
  else:
299
- zh_pause_punc = r"。,、;:?!"
300
- ref_text_len = len(prompt_text.encode('utf-8')) + 3 * len(re.findall(zh_pause_punc, prompt_text))
301
- gen_text_len = len(gt_text.encode('utf-8')) + 3 * len(re.findall(zh_pause_punc, gt_text))
302
  total_mel_len = ref_mel_len + int(ref_mel_len / ref_text_len * gen_text_len / speed)
303
 
304
  # to mel spectrogram
 
296
  # # test vocoder resynthesis
297
  # ref_audio = gt_audio
298
  else:
299
+ ref_text_len = len(prompt_text.encode('utf-8'))
300
+ gen_text_len = len(gt_text.encode('utf-8'))
 
301
  total_mel_len = ref_mel_len + int(ref_mel_len / ref_text_len * gen_text_len / speed)
302
 
303
  # to mel spectrogram