ava-1 / speech.py
GowthamYarlagadda's picture
Upload 304 files
b36e9ec verified
# from config import *
# from openai import OpenAI
# import os
# def openai_generate_speech(audiofile, voice, text):
# client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
# response = client.audio.speech.create(
# model="tts-1",
# voice=voice,
# input=text
# )
# response.stream_to_file(audiofile)
import os
import torch
import torchaudio
import time
from tortoise.api import TextToSpeech
from tortoise.utils.audio import load_voices
import humanize
import datetime as dt
def generate_speech(path_id, outfile, voice, text, speed="standard"):
tts = TextToSpeech(kv_cache=True, half=True)
selected_voices = voice.split(',')
for k, selected_voice in enumerate(selected_voices):
if '&' in selected_voice:
voice_sel = selected_voice.split('&')
else:
voice_sel = [selected_voice]
voice_samples, conditioning_latents = load_voices(voice_sel)
gen, dbg_state = tts.tts_with_preset(text, k=1, voice_samples=voice_samples,
conditioning_latents=conditioning_latents,
return_deterministic_state=True,
preset=speed)
if isinstance(gen, list):
for j, g in enumerate(gen):
torchaudio.save(os.path.join("temp", path_id, outfile), g.squeeze(0).cpu(), 24000)
else:
torchaudio.save(os.path.join("temp", path_id, outfile), gen.squeeze(0).cpu(), 24000)
if __name__ == '__main__':
path_id = os.path.join("temp", "audio", str(int(time.time())))
os.makedirs(path_id, exist_ok=True)
tstart = time.time()
message = """Apple today confirmed that it will be permanently closing its Infinite Loop retail store in
Cupertino, California on January 20. Infinite Loop served as Apple's headquarters between the mid-1990s and
2017, when its current Apple Park headquarters opened a few miles away."""
generate_speech(os.path.join("audio", str(int(time.time()))), "christmas.wav", "train_grace",
message, "ultra_fast")
# openai_generate_speech("speech.mp3", "onyx",
# "Merry Christmas! May the holiday bring you endless joy, laughter, \
# and quality time with friends and family!")
print("total time:", humanize.naturaldelta(dt.timedelta(seconds=int(time.time() - tstart))))