gorkemgoknar commited on
Commit
48423a0
1 Parent(s): 23f5658

will upload non-recoverable errors so we can debug

Browse files
Files changed (1) hide show
  1. app.py +58 -4
app.py CHANGED
@@ -13,6 +13,10 @@ os.environ["COQUI_TOS_AGREED"] = "1"
13
  # langid is used to detect language for longer text
14
  # Most users expect text to be their own language, there is checkbox to disable it
15
  import langid
 
 
 
 
16
 
17
  import gradio as gr
18
  from scipy.io.wavfile import write
@@ -68,6 +72,8 @@ DEVICE_ASSERT_DETECTED=0
68
  DEVICE_ASSERT_PROMPT=None
69
  DEVICE_ASSERT_LANG=None
70
 
 
 
71
  #supported_languages=["en","es","fr","de","it","pt","pl","tr","ru","nl","cs","ar","zh-cn"]
72
  supported_languages=config.languages
73
 
@@ -189,7 +195,19 @@ def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, voice_clea
189
  t_latent=time.time()
190
 
191
  # note diffusion_conditioning not used on hifigan (default mode), it will be empty but need to pass it to model.inference
192
- gpt_cond_latent, diffusion_conditioning, speaker_embedding = model.get_conditioning_latents(audio_path=speaker_wav)
 
 
 
 
 
 
 
 
 
 
 
 
193
  latent_calculation_time = time.time() - t_latent
194
  #metrics_text=f"Embedding calculation time: {latent_calculation_time:.2f} seconds\n"
195
 
@@ -212,7 +230,6 @@ def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, voice_clea
212
  print(f"Real-time factor (RTF): {real_time_factor}")
213
  metrics_text+=f"Real-time factor (RTF): {real_time_factor:.2f}\n"
214
  torchaudio.save("output.wav", torch.tensor(out["wav"]).unsqueeze(0), 24000)
215
-
216
  except RuntimeError as e :
217
  if "device-side assert" in str(e):
218
  # cannot do anything on cuda device side error, need tor estart
@@ -223,13 +240,50 @@ def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, voice_clea
223
  DEVICE_ASSERT_DETECTED=1
224
  DEVICE_ASSERT_PROMPT=prompt
225
  DEVICE_ASSERT_LANG=language
226
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
 
 
 
 
 
 
 
 
 
 
 
 
228
  # HF Space specific.. This error is unrecoverable need to restart space
229
  api.restart_space(repo_id=repo_id)
230
  else:
231
  print("RuntimeError: non device-side assert error:", str(e))
232
- raise e
 
 
 
 
 
 
233
  return (
234
  gr.make_waveform(
235
  audio="output.wav",
 
13
  # langid is used to detect language for longer text
14
  # Most users expect text to be their own language, there is checkbox to disable it
15
  import langid
16
+ import base64
17
+ import csv
18
+ from io import StringIO
19
+ import datetime
20
 
21
  import gradio as gr
22
  from scipy.io.wavfile import write
 
72
  DEVICE_ASSERT_PROMPT=None
73
  DEVICE_ASSERT_LANG=None
74
 
75
+
76
+
77
  #supported_languages=["en","es","fr","de","it","pt","pl","tr","ru","nl","cs","ar","zh-cn"]
78
  supported_languages=config.languages
79
 
 
195
  t_latent=time.time()
196
 
197
  # note diffusion_conditioning not used on hifigan (default mode), it will be empty but need to pass it to model.inference
198
+ try:
199
+ gpt_cond_latent, diffusion_conditioning, speaker_embedding = model.get_conditioning_latents(audio_path=speaker_wav)
200
+ except Exception as e:
201
+ if "Failed to decode" in str(e):
202
+ print("Speaker encoding error", str(e))
203
+ gr.Warning("It appears something wrong with reference, did you unmute your microphone?")
204
+ return (
205
+ None,
206
+ None,
207
+ None,
208
+ None,
209
+ )
210
+
211
  latent_calculation_time = time.time() - t_latent
212
  #metrics_text=f"Embedding calculation time: {latent_calculation_time:.2f} seconds\n"
213
 
 
230
  print(f"Real-time factor (RTF): {real_time_factor}")
231
  metrics_text+=f"Real-time factor (RTF): {real_time_factor:.2f}\n"
232
  torchaudio.save("output.wav", torch.tensor(out["wav"]).unsqueeze(0), 24000)
 
233
  except RuntimeError as e :
234
  if "device-side assert" in str(e):
235
  # cannot do anything on cuda device side error, need tor estart
 
240
  DEVICE_ASSERT_DETECTED=1
241
  DEVICE_ASSERT_PROMPT=prompt
242
  DEVICE_ASSERT_LANG=language
243
+
244
+ # just before restarting save what caused the issue so we can handle it in future
245
+ # Uploading Error data only happens for unrecovarable error
246
+ error_time = datetime.datetime.now().strftime('%d-%m-%Y-%H:%M:%S')
247
+ error_data = [error_time, prompt, language, audio_file_pth, mic_file_path, use_mic, voice_cleanup, no_lang_auto_detect, agree]
248
+ error_data = [str(e) if type(e)!=str else e for e in error_data]
249
+ print(error_data)
250
+ print(speaker_wav)
251
+ write_io = StringIO()
252
+ csv.writer(write_io).writerows(error_data)
253
+ csv_upload= write_io.getvalue().encode()
254
+
255
+ filename = error_time+"_" + str(uuid.uuid4()) +".csv"
256
+ print("Writing error csv")
257
+ error_api = HfApi()
258
+ error_api.upload_file(
259
+ path_or_fileobj=csv_upload,
260
+ path_in_repo=filename,
261
+ repo_id="coqui/xtts-flagged-dataset",
262
+ repo_type="dataset",
263
+ )
264
 
265
+ #speaker_wav
266
+ print("Writing error reference audio")
267
+ speaker_filename = error_time+"_reference_"+ str(uuid.uuid4()) +".wav"
268
+ error_api = HfApi()
269
+ error_api.upload_file(
270
+ path_or_fileobj=speaker_wav,
271
+ path_in_repo=speaker_filename,
272
+ repo_id="coqui/xtts-flagged-dataset",
273
+ repo_type="dataset",
274
+ )
275
+
276
  # HF Space specific.. This error is unrecoverable need to restart space
277
  api.restart_space(repo_id=repo_id)
278
  else:
279
  print("RuntimeError: non device-side assert error:", str(e))
280
+ gr.Warning("Something unexpected happened please retry again.")
281
+ return (
282
+ None,
283
+ None,
284
+ None,
285
+ None,
286
+ )
287
  return (
288
  gr.make_waveform(
289
  audio="output.wav",