import streamlit as st import subprocess import tempfile import sys import os from os.path import exists import requests from huggingface_hub import HfFolder import tarfile from PIL import Image HF_TOKEN = os.environ.get("HF_TOKEN") # Set base path BASE_PATH = os.getcwd() # /home/user/app BASE_PATH_MODEL = os.path.join(BASE_PATH, "Model") # Piper TTS download url URL_PIPER_DOWNLOAD = "https://github.com/rhasspy/piper/releases/download/v1.2.0/piper_amd64.tar.gz" # TTS model files URL_TTS_ONNX = "https://huggingface.co/spaces/CavidanZ/TTS-azerbaijani-model/resolve/main/last.onnx" URL_TTS_ONNX_2 = "https://huggingface.co/spaces/CavidanZ/TTS-azerbaijani-model/resolve/main/last2.onnx" URL_TTS_FH = "https://huggingface.co/spaces/CavidanZ/TTS-azerbaijani-model/resolve/main/last_FH.onnx" URL_TACOTRON2 = "https://huggingface.co/spaces/CavidanZ/TTS-azerbaijani-model/resolve/main/Audiobook_based" TMP_PIPER_FILENAME = os.path.join(BASE_PATH, "piper.tgz") headers = {"Authorization": f"Bearer {HF_TOKEN}"} ########################## # CHECK OR INSTALL PIPER # ########################## if os.path.exists(os.path.join(BASE_PATH,"piper")) == False: # Piper not downloaded and extracted yet, let's do this first. response = requests.get(URL_PIPER_DOWNLOAD) if response.status_code == 200: with open(TMP_PIPER_FILENAME, 'wb') as f: f.write(response.content) with tarfile.open(TMP_PIPER_FILENAME, 'r:gz') as tar: tar.extractall(BASE_PATH) else: st.markdown(f"Failed to download Piper TTS from {URL_PIPER_DOWNLOAD} (Status code: {response.status_code})") ##################################################### # CHECK OR DOWNLOAD: TTS model files # ##################################################### # Create "Model" path if not existing if os.path.exists(BASE_PATH_MODEL) == False: os.makedirs(BASE_PATH_MODEL) # --- Download TTS model - VITS Version 1 --- # response = requests.get(URL_TTS_ONNX, headers=headers) if response.status_code == 200: with open(os.path.join(BASE_PATH_MODEL, "last.onnx"), 'wb') as f: f.write(response.content) else: st.markdown(f"Failed to download TTS from {URL_TTS_ONNX} (Status code: {response.status_code})") response = requests.get((URL_TTS_ONNX + ".json"), headers=headers) if response.status_code == 200: with open(os.path.join(BASE_PATH_MODEL, "last.onnx.json"), 'wb') as f: f.write(response.content) else: st.markdown(f"Failed to download TTS json from {URL_TTS_ONNX}.json (Status code: {response.status_code})") # --- Download TTS model - VITS Version 2 --- # response = requests.get(URL_TTS_ONNX_2, headers=headers) if response.status_code == 200: with open(os.path.join(BASE_PATH_MODEL, "last2.onnx"), 'wb') as f: f.write(response.content) else: st.markdown(f"Failed to download TTS from {URL_TTS_ONNX} (Status code: {response.status_code})") response = requests.get((URL_TTS_ONNX_2 + ".json"), headers=headers) if response.status_code == 200: with open(os.path.join(BASE_PATH_MODEL, "last2.onnx.json"), 'wb') as f: f.write(response.content) else: st.markdown(f"Failed to download TTS json from {URL_TTS_ONNX}.json (Status code: {response.status_code})") # --- Download TTS model - VITS Version FH --- # response = requests.get(URL_TTS_FH, headers=headers) if response.status_code == 200: with open(os.path.join(BASE_PATH_MODEL, "last_FH.onnx"), 'wb') as f: f.write(response.content) else: st.markdown(f"Failed to download TTS from {URL_TTS_ONNX} (Status code: {response.status_code})") response = requests.get((URL_TTS_FH + ".json"), headers=headers) if response.status_code == 200: with open(os.path.join(BASE_PATH_MODEL, "last_FH.onnx.json"), 'wb') as f: f.write(response.content) else: st.markdown(f"Failed to download TTS json from {URL_TTS_ONNX}.json (Status code: {response.status_code})") ########################### # MODEL DOWNLOAD FINISHED # ########################### hide_streamlit_style = """ """ st.markdown(hide_streamlit_style, unsafe_allow_html=True) st.title('Hello!') st.header('A text-to-speech (TTS) model in Azerbaijani language') with st.form("my_form"): option = st.selectbox( 'Choose the model.', ('VITS Model 1','VITS Model 2')) text = st.text_area("Text to generate audio from:",max_chars=500) # Default parameter values length_scale = st.slider('Choose the audio speed (Higher values are slower audios):', min_value = 0.000, max_value = 5.000, step = 0.001, value = 1.0, label_visibility="visible") submitted = st.form_submit_button("Submit") if submitted: with st.spinner("Please, wait... :)"): filename = tempfile.NamedTemporaryFile(suffix=".wav", delete=False) # Set Piper TTS command based on choice PIPER_CMD = os.path.join(BASE_PATH,"piper","piper") SPEAKER_ID = "0" match option: case "VITS Model 1": MODEL = "last2.onnx" case "VITS Model 2": MODEL = "last.onnx" # case "FH": # MODEL = "last_FH.onnx" cmd = "echo '" + text + "' | " + BASE_PATH + "/piper/piper --model " + os.path.join(BASE_PATH_MODEL, MODEL) + " --speaker " + SPEAKER_ID + " --output_file " + filename.name + " --length-scale " + str(length_scale) result = subprocess.run(cmd, shell=True) audio_file = open(filename.name, 'rb') audio_bytes = audio_file.read() st.audio(audio_bytes,format="audio/wav") try: st.download_button('Download audio', audio_bytes, file_name='TTS-Voice.wav') except: pass