Spaces:

GitMylo
/

bark-voice-cloning

Running

Mylo commited on May 23, 2023

Commit

c1a6347

•

1 Parent(s): 162810d

Maybe squeeze?

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import os.path
 import uuid
@@ -28,6 +29,8 @@ def clone(audio, *args):
     wav = wav[-int(sr*20):]  # Take only the last 20 seconds
     wav = wav.reshape(1, -1)  # Reshape from gradio style to HuBERT shape. (N, 1) to (1, N)
     wav = torch.tensor(wav, dtype=torch.float32)
@@ -42,7 +45,7 @@ def clone(audio, *args):
     with torch.no_grad():
         encoded_frames = encodec_model.encode(wav)
-    codes = torch.cat([encoded[0] for encoded in encoded_frames], dim=-1)  # [B, n_q, T]
     if not os.path.isdir('data/speakers'):
         os.makedirs('data/speakers')

+import math
 import os.path
 import uuid
     wav = wav[-int(sr*20):]  # Take only the last 20 seconds
+    duration = wav.shape[0]
     wav = wav.reshape(1, -1)  # Reshape from gradio style to HuBERT shape. (N, 1) to (1, N)
     wav = torch.tensor(wav, dtype=torch.float32)
     with torch.no_grad():
         encoded_frames = encodec_model.encode(wav)
+    codes = torch.cat([encoded[0] for encoded in encoded_frames], dim=-1).squeeze()  # [B, n_q, T]
     if not os.path.isdir('data/speakers'):
         os.makedirs('data/speakers')