Spaces:
Running
Running
Mylo
commited on
Commit
•
c1a6347
1
Parent(s):
162810d
Maybe squeeze?
Browse files
app.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import os.path
|
2 |
import uuid
|
3 |
|
@@ -28,6 +29,8 @@ def clone(audio, *args):
|
|
28 |
|
29 |
wav = wav[-int(sr*20):] # Take only the last 20 seconds
|
30 |
|
|
|
|
|
31 |
wav = wav.reshape(1, -1) # Reshape from gradio style to HuBERT shape. (N, 1) to (1, N)
|
32 |
|
33 |
wav = torch.tensor(wav, dtype=torch.float32)
|
@@ -42,7 +45,7 @@ def clone(audio, *args):
|
|
42 |
with torch.no_grad():
|
43 |
encoded_frames = encodec_model.encode(wav)
|
44 |
|
45 |
-
codes = torch.cat([encoded[0] for encoded in encoded_frames], dim=-1) # [B, n_q, T]
|
46 |
|
47 |
if not os.path.isdir('data/speakers'):
|
48 |
os.makedirs('data/speakers')
|
|
|
1 |
+
import math
|
2 |
import os.path
|
3 |
import uuid
|
4 |
|
|
|
29 |
|
30 |
wav = wav[-int(sr*20):] # Take only the last 20 seconds
|
31 |
|
32 |
+
duration = wav.shape[0]
|
33 |
+
|
34 |
wav = wav.reshape(1, -1) # Reshape from gradio style to HuBERT shape. (N, 1) to (1, N)
|
35 |
|
36 |
wav = torch.tensor(wav, dtype=torch.float32)
|
|
|
45 |
with torch.no_grad():
|
46 |
encoded_frames = encodec_model.encode(wav)
|
47 |
|
48 |
+
codes = torch.cat([encoded[0] for encoded in encoded_frames], dim=-1).squeeze() # [B, n_q, T]
|
49 |
|
50 |
if not os.path.isdir('data/speakers'):
|
51 |
os.makedirs('data/speakers')
|