Spaces:
Runtime error
Runtime error
File size: 1,942 Bytes
6d7e145 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import sys
import asyncio
from io import BytesIO
from fairseq import checkpoint_utils
import torch
import edge_tts
import librosa
# https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI/blob/main/config.py#L43-L55 # noqa
def has_mps() -> bool:
if sys.platform != "darwin":
return False
else:
if not getattr(torch, 'has_mps', False):
return False
try:
torch.zeros(1).to(torch.device("mps"))
return True
except Exception:
return False
def is_half(device: str) -> bool:
if not device.startswith('cuda'):
return False
else:
gpu_name = torch.cuda.get_device_name(
int(device.split(':')[-1])
).upper()
# ...regex?
if (
('16' in gpu_name and 'V100' not in gpu_name)
or 'P40' in gpu_name
or '1060' in gpu_name
or '1070' in gpu_name
or '1080' in gpu_name
):
return False
return True
def load_hubert_model(device: str, model_path: str = 'hubert_base.pt'):
model = checkpoint_utils.load_model_ensemble_and_task(
[model_path]
)[0][0].to(device)
if is_half(device):
return model.half()
else:
return model.float()
async def call_edge_tts(speaker_name: str, text: str):
tts_com = edge_tts.Communicate(text, speaker_name)
tts_raw = b''
# Stream TTS audio to bytes
async for chunk in tts_com.stream():
if chunk['type'] == 'audio':
tts_raw += chunk['data']
# Convert mp3 stream to wav
ffmpeg_proc = await asyncio.create_subprocess_exec(
'ffmpeg',
'-f', 'mp3',
'-i', '-',
'-f', 'wav',
'-',
stdin=asyncio.subprocess.PIPE,
stdout=asyncio.subprocess.PIPE
)
(tts_wav, _) = await ffmpeg_proc.communicate(tts_raw)
return librosa.load(BytesIO(tts_wav))
|