Vladimir Alabov commited on
Commit
e227a3a
1 Parent(s): 365ec8b
Files changed (1) hide show
  1. app.py +19 -21
app.py CHANGED
@@ -24,27 +24,25 @@ def audio_postprocess(self, y):
24
  return gr_processing_utils.encode_url_or_file_to_base64(data["name"])
25
 
26
  gr.Audio.postprocess = audio_postprocess
27
- def create_vc_fn(model, sid):
28
- def vc_fn(input_audio, vc_transform, auto_f0):
29
- if input_audio is None:
30
- return "You need to upload an audio", None
31
- sampling_rate, audio = input_audio
32
- duration = audio.shape[0] / sampling_rate
33
- if duration > 20 and limitation:
34
- return "Please upload an audio file that is less than 20 seconds. If you need to generate a longer audio file, please use Colab.", None
35
- audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
36
- if len(audio.shape) > 1:
37
- audio = librosa.to_mono(audio.transpose(1, 0))
38
- if sampling_rate != 16000:
39
- audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
40
- raw_path = io.BytesIO()
41
- soundfile.write(raw_path, audio, 16000, format="wav")
42
- raw_path.seek(0)
43
- out_audio, out_sr = model.infer(sid, vc_transform, raw_path,
44
- auto_predict_f0=auto_f0,
45
- )
46
- return "Success", (44100, out_audio.cpu().numpy())
47
- return vc_fn
48
 
49
  def get_speakers():
50
  speakers = []
 
24
  return gr_processing_utils.encode_url_or_file_to_base64(data["name"])
25
 
26
  gr.Audio.postprocess = audio_postprocess
27
+ def vc_fn(input_audio, vc_transform, auto_f0):
28
+ if input_audio is None:
29
+ return "You need to upload an audio", None
30
+ sampling_rate, audio = input_audio
31
+ duration = audio.shape[0] / sampling_rate
32
+ if duration > 20 and limitation:
33
+ return "Please upload an audio file that is less than 20 seconds. If you need to generate a longer audio file, please use Colab.", None
34
+ audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
35
+ if len(audio.shape) > 1:
36
+ audio = librosa.to_mono(audio.transpose(1, 0))
37
+ if sampling_rate != 16000:
38
+ audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
39
+ raw_path = io.BytesIO()
40
+ soundfile.write(raw_path, audio, 16000, format="wav")
41
+ raw_path.seek(0)
42
+ out_audio, out_sr = model.infer(sid, vc_transform, raw_path,
43
+ auto_predict_f0=auto_f0,
44
+ )
45
+ return "Success", (44100, out_audio.cpu().numpy())
 
 
46
 
47
  def get_speakers():
48
  speakers = []