Spaces:
Runtime error
Runtime error
File size: 2,900 Bytes
0374441 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
# StyleTTS 2 HTTP Streaming API by @fakerybakery - Copyright (c) 2023 mrfakename. All rights reserved.
# Docs: API_DOCS.md
# To-Do:
# * Support voice cloning
# * Implement authentication, user "credits" system w/ SQLite3
import io
import markdown
from tortoise.utils.text import split_and_recombine_text
from flask import Flask, Response, request, jsonify
import numpy as np
import ljinference
import torch
import hashlib
from scipy.io.wavfile import read, write
from flask_cors import CORS
import os
import torchaudio
def genHeader(sampleRate, bitsPerSample, channels):
datasize = 2000 * 10**6
o = bytes("RIFF", "ascii")
o += (datasize + 36).to_bytes(4, "little")
o += bytes("WAVE", "ascii")
o += bytes("fmt ", "ascii")
o += (16).to_bytes(4, "little")
o += (1).to_bytes(2, "little")
o += (channels).to_bytes(2, "little")
o += (sampleRate).to_bytes(4, "little")
o += (sampleRate * channels * bitsPerSample // 8).to_bytes(4, "little")
o += (channels * bitsPerSample // 8).to_bytes(2, "little")
o += (bitsPerSample).to_bytes(2, "little")
o += bytes("data", "ascii")
o += (datasize).to_bytes(4, "little")
return o
import phonemizer
global_phonemizer = phonemizer.backend.EspeakBackend(language='en-us', preserve_punctuation=True, with_stress=True)
print("Starting Flask app")
app = Flask(__name__)
cors = CORS(app)
@app.route("/")
def index():
with open('API_DOCS.md', 'r') as f:
return markdown.markdown(f.read())
cache_dir = 'cache'
if not os.path.exists(cache_dir):
os.makedirs(cache_dir, exist_ok=True)
@app.route("/api", methods=['GET', 'POST'])
def serve_wav():
if request.method == 'GET':
request.form = request.args
if 'text' not in request.form:
if 'text' not in request.json:
error_response = {'error': 'Missing required fields. Please include "text" in your request.'}
return jsonify(error_response), 400
else:
text = request.json['text']
else:
text = request.form['text'].strip()
texts = split_and_recombine_text(text)
audios = []
noise = torch.randn(1,1,256).to('cuda' if torch.cuda.is_available() else 'cpu')
for t in texts:
# check for cache
hash = hashlib.sha256(t.encode()).hexdigest()
if os.path.exists(os.path.join(cache_dir, hash + '.wav')):
audios.append(read(os.path.join(cache_dir, hash + '.wav'))[1])
else:
aud = ljinference.inference(t, noise, diffusion_steps=7, embedding_scale=1)
write(os.path.join(cache_dir, hash + '.wav'), 24000, aud)
audios.append(aud)
output_buffer = io.BytesIO()
write(output_buffer, 24000, np.concatenate(audios))
response = Response(output_buffer.getvalue())
response.headers["Content-Type"] = "audio/wav"
return response
if __name__ == "__main__":
app.run("0.0.0.0") |