Spaces:
Configuration error
Configuration error
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import json | |
import os | |
import tempfile | |
import shutil | |
import requests | |
from pathlib import Path | |
temp_dir = tempfile.TemporaryDirectory() | |
global ckpt_temp_file | |
global audio_temp_file | |
global config_temp_file | |
################################################### | |
from utils.hparams import hparams | |
from preprocessing.data_gen_utils import get_pitch_parselmouth,get_pitch_crepe | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import IPython.display as ipd | |
import utils | |
import librosa | |
import torchcrepe | |
from infer import * | |
import logging | |
from infer_tools.infer_tool import * | |
import io | |
import parselmouth | |
clip_completed = False | |
def render_audio(ckpt_temp_file, config_temp_file, audio_temp_file, title, title2, title3, choice, noise_step, use_mel_as_base): | |
logging.getLogger('numba').setLevel(logging.WARNING) | |
title = int(title) | |
title2 = int(title2) | |
project_name = "Unnamed" | |
model_path = ckpt_temp_file | |
config_path= config_temp_file | |
hubert_gpu=True | |
svc_model = Svc(project_name,config_path,hubert_gpu, model_path) | |
print('model loaded') | |
wav_fn = audio_temp_file | |
demoaudio, sr = librosa.load(wav_fn) | |
key = title # 音高调整,支持正负(半音) | |
# 加速倍数 | |
pndm_speedup = 20 | |
wav_gen='que.wav' | |
# Show the spinner and run the run_clip function inside the 'with' block | |
with st.spinner("Rendering Audio..."): | |
f0_tst, f0_pred, audio = run_clip(svc_model,file_path=wav_fn, key=key, acc=title2, use_crepe=Crepe, use_pe=False, thre=0.05, | |
use_gt_mel=use_mel_as_base, add_noise_step=noise_step,project_name=project_name,out_path=wav_gen) | |
##PRAAT | |
formant_shift_ratio_str = title3 | |
formant_shift_ratio = float(formant_shift_ratio_str) | |
# If the formant shift ratio is not equal to 1.0, change the gender of the sound using parselmouth | |
if formant_shift_ratio != 1.0: | |
sound = parselmouth.Sound(wav_gen) | |
print(wav_gen) | |
Audio(data=sound.values, rate=sound.sampling_frequency) | |
sound.get_power() | |
sampling_rate = sound.sampling_frequency | |
print(sampling_rate) | |
resampled_sound = sound.resample(sampling_rate) | |
print(resampled_sound) | |
factor = formant_shift_ratio | |
print(factor) | |
manipulated_sound = call(sound, "Change gender", 75, 500, factor, 0, 1, 1) | |
print(manipulated_sound) | |
manipulated_sound.save("que.wav", "WAV") | |
print("Gender correct!") | |
if clip_completed: | |
st.audio(wav_gen) | |
####################################################### | |
st.set_page_config( | |
page_title="DiffSVC Render", | |
page_icon="🧊", | |
initial_sidebar_state="expanded", | |
) | |
############ | |
col1, col2 = st.columns(2) | |
col1.title('DIFF-SVC Render') | |
col2.title('Settings') | |
ckpt = col1.file_uploader("Choose your CKPT", type='ckpt') | |
if ckpt is not None: | |
#TEMP FUNCTION | |
with tempfile.NamedTemporaryFile(mode="wb", suffix='.ckpt', delete=False) as temp: | |
# Get the file contents as bytes | |
bytes_data = ckpt.getvalue() | |
# Write the bytes to the temporary file | |
temp.write(bytes_data) | |
ckpt_temp_file = temp.name | |
# Print the temporary file name | |
print(temp.name) | |
# Display the file path | |
if "ckpt_temp_file" in locals(): | |
st.success("File saved to: {}".format(ckpt_temp_file)) | |
# File uploader | |
config = col1.file_uploader("Choose your config", type='yaml') | |
if config is not None: | |
#TEMP FUNCTION | |
with tempfile.NamedTemporaryFile(mode="wb", suffix='.yaml', delete=False) as temp: | |
# Get the file contents as bytes | |
bytes_data = config.getvalue() | |
# Write the bytes to the temporary file | |
temp.write(bytes_data) | |
config_temp_file = temp.name | |
# Print the temporary file name | |
print(temp.name) | |
# Display the file path | |
if "config_temp_file" in locals(): | |
st.success("File saved to: {}".format(config_temp_file)) | |
audio = col1.file_uploader("Choose your audio", type=["wav"]) | |
if audio is not None: | |
#EMP FUNCTION | |
with tempfile.NamedTemporaryFile(mode="wb", suffix='.wav', delete=False) as temp: | |
# Get the file contents as bytes | |
bytes_data = audio.getvalue() | |
# Write the bytes to the temporary file | |
temp.write(bytes_data) | |
audio_temp_file = temp.name | |
# Print the temporary file name | |
print(temp.name) | |
# Display the file path | |
if "audio_temp_file" in locals(): | |
st.success("File saved to: {}".format(audio_temp_file)) | |
title = col2.number_input("Key", value=0, step=1, min_value=-12, max_value=12) | |
title2 = col2.number_input("Speedup", value=20, step=1, min_value=5, max_value=100) | |
title3 = col2.number_input("Gender Flag", value=1.00, step=0.01, min_value=0.70, max_value=1.30, help='Default is 1.0, it works by decimals, setting it at 1.05 will make your render sound more female-ish, setting it to 0.95 will make it sound more masculine, for example.') | |
choice = col2.checkbox('Use Crepe', value=False) | |
# Create checkbox for using Mel as Base | |
use_mel_as_base = col2.checkbox('Use Mel as Base', value=False, help='gt mel: Enabling this will use the input audio as a base and will unlock a new parameter, do not use this if you dont know what it does.') | |
noise_step = 600 | |
# Show "Noise Step" input parameter when checkbox is checked | |
if use_mel_as_base: | |
noise_step = col2.number_input('Noise Step', value=600, min_value=1, max_value=1000, step=50) | |
else: | |
noise_step = 600 | |
password = col2.text_input("Enter password", help='Password can be got by agreeing to TOS and getting allowed after validation, you can go to the TOS here:') | |
correct_password = os.environ.get("gatepassword") | |
### | |
if st.button("Render audio"): | |
if password == correct_password: | |
render_audio(ckpt_temp_file, config_temp_file, audio_temp_file, title, title2, title3, noise_step, choice, use_mel_as_base) | |
else: | |
st.error("Incorrect password") | |