import streamlit as st import pandas as pd import numpy as np import matplotlib.pyplot as plt import json import os import tempfile import shutil import requests from pathlib import Path temp_dir = tempfile.TemporaryDirectory() global ckpt_temp_file global audio_temp_file global config_temp_file ################################################### from utils.hparams import hparams from preprocessing.data_gen_utils import get_pitch_parselmouth,get_pitch_crepe import numpy as np import matplotlib.pyplot as plt import IPython.display as ipd import utils import librosa import torchcrepe from infer import * import logging from infer_tools.infer_tool import * import io import parselmouth clip_completed = False def render_audio(ckpt_temp_file, config_temp_file, audio_temp_file, title, title2, title3, choice, noise_step, use_mel_as_base): logging.getLogger('numba').setLevel(logging.WARNING) title = int(title) title2 = int(title2) title3 = formant_shift_ratio project_name = "Unnamed" model_path = ckpt_temp_file config_path= config_temp_file hubert_gpu=True svc_model = Svc(project_name,config_path,hubert_gpu, model_path) print('model loaded') wav_fn = audio_temp_file demoaudio, sr = librosa.load(wav_fn) key = title # 音高调整,支持正负(半音) # 加速倍数 pndm_speedup = 20 wav_gen='que.wav' # Show the spinner and run the run_clip function inside the 'with' block with st.spinner("Rendering Audio..."): f0_tst, f0_pred, audio = run_clip(svc_model,file_path=wav_fn, key=key, acc=title2, use_crepe=Crepe, use_pe=False, thre=0.05, use_gt_mel=use_mel_as_base, add_noise_step=noise_step,project_name=project_name,out_path=wav_gen) ##PRAAT formant_shift_ratio_str = title3 formant_shift_ratio = float(formant_shift_ratio_str) # If the formant shift ratio is not equal to 1.0, change the gender of the sound using parselmouth if formant_shift_ratio != 1.0: sound = parselmouth.Sound(wav_gen) print(wav_gen) Audio(data=sound.values, rate=sound.sampling_frequency) sound.get_power() sampling_rate = sound.sampling_frequency print(sampling_rate) resampled_sound = sound.resample(sampling_rate) print(resampled_sound) factor = formant_shift_ratio print(factor) manipulated_sound = call(sound, "Change gender", 75, 500, factor, 0, 1, 1) print(manipulated_sound) manipulated_sound.save("que.wav", "WAV") print("Gender correct!") if clip_completed: st.audio(wav_gen) ####################################################### st.set_page_config( page_title="DiffSVC Render", page_icon="🧊", initial_sidebar_state="expanded", ) ############ col1, col2 = st.columns(2) col1.title('DIFF-SVC Render') col2.title('Settings') ckpt = col1.file_uploader("Choose your CKPT", type='ckpt') if ckpt is not None: #TEMP FUNCTION with tempfile.NamedTemporaryFile(mode="wb", suffix='.ckpt', delete=False) as temp: # Get the file contents as bytes bytes_data = ckpt.getvalue() # Write the bytes to the temporary file temp.write(bytes_data) ckpt_temp_file = temp.name # Print the temporary file name print(temp.name) # Display the file path if "ckpt_temp_file" in locals(): st.success("File saved to: {}".format(ckpt_temp_file)) # File uploader config = col1.file_uploader("Choose your config", type='yaml') if config is not None: #TEMP FUNCTION with tempfile.NamedTemporaryFile(mode="wb", suffix='.yaml', delete=False) as temp: # Get the file contents as bytes bytes_data = config.getvalue() # Write the bytes to the temporary file temp.write(bytes_data) config_temp_file = temp.name # Print the temporary file name print(temp.name) # Display the file path if "config_temp_file" in locals(): st.success("File saved to: {}".format(config_temp_file)) audio = col1.file_uploader("Choose your audio", type=["wav"]) if audio is not None: #EMP FUNCTION with tempfile.NamedTemporaryFile(mode="wb", suffix='.wav', delete=False) as temp: # Get the file contents as bytes bytes_data = audio.getvalue() # Write the bytes to the temporary file temp.write(bytes_data) audio_temp_file = temp.name # Print the temporary file name print(temp.name) # Display the file path if "audio_temp_file" in locals(): st.success("File saved to: {}".format(audio_temp_file)) title = col2.number_input("Key", value=0, step=1, min_value=-12, max_value=12) title2 = col2.number_input("Speedup", value=20, step=1, min_value=5, max_value=100) title3 = col2.number_input("Gender Flag", value=1.00, step=0.01, min_value=0.70, max_value=1.30, help='Default is 1.0, it works by decimals, setting it at 1.05 will make your render sound more female-ish, setting it to 0.95 will make it sound more masculine, for example.') choice = col2.checkbox('Use Crepe', value=False) # Create checkbox for using Mel as Base use_mel_as_base = col2.checkbox('Use Mel as Base', value=False, help='gt mel: Enabling this will use the input audio as a base and will unlock a new parameter, do not use this if you dont know what it does.') noise_step = 600 # Show "Noise Step" input parameter when checkbox is checked if use_mel_as_base: noise_step = col2.number_input('Noise Step', value=600, min_value=1, max_value=1000, step=50) else: noise_step = 600 password = col2.text_input("Enter password", help='Password can be got by agreeing to TOS and getting allowed after validation, you can go to the TOS here:') correct_password = os.environ.get("gatepassword") ### if st.button("Render audio"): if password == correct_password: render_audio(ckpt_temp_file, config_temp_file, audio_temp_file, title, title2, title3, noise_step, choice, use_mel_as_base) else: st.error("Incorrect password")