ok
Browse files- api.py +2 -1
- results/.gitattributes +5 -0
api.py
CHANGED
@@ -31,6 +31,7 @@ def download_models(specific_models=None):
|
|
31 |
"""
|
32 |
MODELS = {
|
33 |
'autoregressive.pth': 'https://huggingface.co/jbetker/tortoise-tts-v2/resolve/hf/.models/autoregressive.pth',
|
|
|
34 |
'clvp.pth': 'https://huggingface.co/jbetker/tortoise-tts-v2/resolve/hf/.models/clvp.pth',
|
35 |
'cvvp.pth': 'https://huggingface.co/jbetker/tortoise-tts-v2/resolve/hf/.models/cvvp.pth',
|
36 |
'diffusion_decoder.pth': 'https://huggingface.co/jbetker/tortoise-tts-v2/resolve/hf/.models/diffusion_decoder.pth',
|
@@ -153,7 +154,7 @@ def classify_audio_clip(clip):
|
|
153 |
:param clip: torch tensor containing audio waveform data (get it from load_audio)
|
154 |
:return: True if the clip was classified as coming from Tortoise and false if it was classified as real.
|
155 |
"""
|
156 |
-
download_models(['classifier'])
|
157 |
classifier = AudioMiniEncoderWithClassifierHead(2, spec_dim=1, embedding_dim=512, depth=5, downsample_factor=4,
|
158 |
resnet_blocks=2, attn_blocks=4, num_attn_heads=4, base_channels=32,
|
159 |
dropout=0, kernel_size=5, distribute_zero_label=False)
|
|
|
31 |
"""
|
32 |
MODELS = {
|
33 |
'autoregressive.pth': 'https://huggingface.co/jbetker/tortoise-tts-v2/resolve/hf/.models/autoregressive.pth',
|
34 |
+
'classifier.pth': 'https://huggingface.co/jbetker/tortoise-tts-v2/resolve/hf/.models/classifier.pth',
|
35 |
'clvp.pth': 'https://huggingface.co/jbetker/tortoise-tts-v2/resolve/hf/.models/clvp.pth',
|
36 |
'cvvp.pth': 'https://huggingface.co/jbetker/tortoise-tts-v2/resolve/hf/.models/cvvp.pth',
|
37 |
'diffusion_decoder.pth': 'https://huggingface.co/jbetker/tortoise-tts-v2/resolve/hf/.models/diffusion_decoder.pth',
|
|
|
154 |
:param clip: torch tensor containing audio waveform data (get it from load_audio)
|
155 |
:return: True if the clip was classified as coming from Tortoise and false if it was classified as real.
|
156 |
"""
|
157 |
+
download_models(['classifier.pth'])
|
158 |
classifier = AudioMiniEncoderWithClassifierHead(2, spec_dim=1, embedding_dim=512, depth=5, downsample_factor=4,
|
159 |
resnet_blocks=2, attn_blocks=4, num_attn_heads=4, base_channels=32,
|
160 |
dropout=0, kernel_size=5, distribute_zero_label=False)
|
results/.gitattributes
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
favorite_riding_hood.mp3 filter=lfs diff=lfs merge=lfs -text
|
2 |
+
favorites filter=lfs diff=lfs merge=lfs -text
|
3 |
+
riding_hood filter=lfs diff=lfs merge=lfs -text
|
4 |
+
tacotron_comparison filter=lfs diff=lfs merge=lfs -text
|
5 |
+
various filter=lfs diff=lfs merge=lfs -text
|