File size: 10,863 Bytes
0ce8c7d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 |
import os
import json
import torch
import asyncio
import librosa
import hashlib
import edge_tts
import gradio as gr
from config import Config
from vc_infer_pipeline import VC
from fairseq import checkpoint_utils
from lib.infer_pack.models import (SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono, SynthesizerTrnMs768NSFsid, SynthesizerTrnMs768NSFsid_nono,)
config = Config()
def load_json_file(filepath):
with open(filepath, "r", encoding="utf-8") as f: content = json.load(f)
return content
def file_checksum(file_path):
with open(file_path, 'rb') as f:
file_data = f.read()
return hashlib.md5(file_data).hexdigest()
def get_existing_model_info(category_directory):
model_info_path = os.path.join(category_directory, 'model_info.json')
if os.path.exists(model_info_path):
with open(model_info_path, 'r') as f: return json.load(f)
return None
def generate_model_info_files():
folder_info = {}
model_directory = "models/"
for category_name in os.listdir(model_directory):
category_directory = os.path.join(model_directory, category_name)
if not os.path.isdir(category_directory): continue
folder_info[category_name] = {"title": category_name, "folder_path": category_name}
existing_model_info = get_existing_model_info(category_directory)
model_info = {}
regenerate_model_info = False
for model_name in os.listdir(category_directory):
model_path = os.path.join(category_directory, model_name)
if not os.path.isdir(model_path): continue
model_data, regenerate = gather_model_info(category_directory, model_name, model_path, existing_model_info)
if model_data is not None:
model_info[model_name] = model_data
regenerate_model_info |= regenerate
if regenerate_model_info:
with open(os.path.join(category_directory, 'model_info.json'), 'w') as f: json.dump(model_info, f, indent=4)
folder_info_path = os.path.join(model_directory, 'folder_info.json')
with open(folder_info_path, 'w') as f: json.dump(folder_info, f, indent=4)
def should_regenerate_model_info(existing_model_info, model_name, pth_checksum, index_checksum):
if existing_model_info is None or model_name not in existing_model_info: return True
return (existing_model_info[model_name]['model_path_checksum'] != pth_checksum or existing_model_info[model_name]['index_path_checksum'] != index_checksum)
def get_model_files(model_path): return [f for f in os.listdir(model_path) if f.endswith('.pth') or f.endswith('.index')]
def gather_model_info(category_directory, model_name, model_path, existing_model_info):
model_files = get_model_files(model_path)
if len(model_files) != 2: return None, False
pth_file = [f for f in model_files if f.endswith('.pth')][0]
index_file = [f for f in model_files if f.endswith('.index')][0]
pth_checksum = file_checksum(os.path.join(model_path, pth_file))
index_checksum = file_checksum(os.path.join(model_path, index_file))
regenerate = should_regenerate_model_info(existing_model_info, model_name, pth_checksum, index_checksum)
return {"title": model_name, "model_path": pth_file, "feature_retrieval_library": index_file, "model_path_checksum": pth_checksum, "index_path_checksum": index_checksum}, regenerate
def create_vc_fn(model_name, tgt_sr, net_g, vc, if_f0, version, file_index):
def vc_fn(tts_text, tts_voice):
try:
if len(tts_text) > 100: return None
if tts_text is None or tts_voice is None: return None
asyncio.run(edge_tts.Communicate(tts_text, "-".join(tts_voice.split('-')[:-1])).save("tts.mp3"))
audio, sr = librosa.load("tts.mp3", sr=16000, mono=True)
vc_input = "tts.mp3"
times = [0, 0, 0]
audio_opt = vc.pipeline(hubert_model, net_g, 0, audio, vc_input, times, 0, "pm", file_index, 0.7, if_f0, 3, tgt_sr, 0, 1, version, 0.5, f0_file=None)
return (tgt_sr, audio_opt)
except Exception: return None
return vc_fn
def load_model_parameters(category_folder, character_name, info):
model_index = f"models/{category_folder}/{character_name}/{info['feature_retrieval_library']}"
cpt = torch.load(f"models/{category_folder}/{character_name}/{info['model_path']}", map_location="cpu")
return model_index, cpt
def select_net_g(cpt, version, if_f0):
if version == "v1":
if if_f0 == 1: net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
else: net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
elif version == "v2":
if if_f0 == 1: net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=config.is_half)
else: net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
return net_g
def load_model_and_prepare(cpt, net_g):
del net_g.enc_q
net_g.load_state_dict(cpt["weight"], strict=False)
net_g.eval().to(config.device)
net_g = net_g.half() if config.is_half else net_g.float()
return net_g
def create_and_append_model(models, model_functions, character_name, model_title, version, vc_fn):
models.append((character_name, model_title, version, vc_fn))
model_functions[character_name] = vc_fn
return models, model_functions
def load_model():
categories = []
model_functions = {}
folder_info = load_json_file("models/folder_info.json")
for category_name, category_info in folder_info.items():
models = []
models_info = load_json_file(f"models/{category_info['folder_path']}/model_info.json")
for character_name, info in models_info.items():
model_index, cpt = load_model_parameters(category_info['folder_path'], character_name, info)
net_g = select_net_g(cpt, cpt.get("version", "v1"), cpt.get("f0", 1))
cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]
net_g = load_model_and_prepare(cpt, net_g)
vc = VC(cpt["config"][-1], config)
vc_fn = create_vc_fn(info['model_path'], cpt["config"][-1], net_g, vc, cpt.get("f0", 1), cpt.get("version", "v1"), model_index)
models, model_functions = create_and_append_model(models, model_functions, character_name, info['title'], cpt.get("version", "v1"), vc_fn)
categories.append([category_info['title'], category_info['folder_path'], models])
return categories, model_functions
generate_model_info_files()
css = """
.gradio-container { font-family: 'IBM Plex Sans', sans-serif; }
footer { visibility: hidden; display: none; }
.center-container { display: flex; flex-direction: column; align-items: center; justify-content: center;}
"""
if __name__ == '__main__':
global hubert_model
models, _, _ = checkpoint_utils.load_model_ensemble_and_task(["hubert_base.pt"], suffix="")
hubert_model = models[0]
hubert_model = hubert_model.to(config.device)
hubert_model = hubert_model.half() if config.is_half else hubert_model.float()
hubert_model.eval()
categories, model_functions = load_model()
tts_voice_list = asyncio.get_event_loop().run_until_complete(edge_tts.list_voices())
voices = [f"{v['ShortName']}-{v['Gender']}" for v in tts_voice_list]
with gr.Blocks(css=css, title="Demo RVC TTS - Pavloh", theme=gr.themes.Soft(primary_hue="cyan", secondary_hue="blue", radius_size="lg", text_size="lg")
.set(loader_color="#0B0F19", shadow_drop='*shadow_drop_lg', block_border_width="3px")) as pavloh:
gr.HTML("""
<div class="center-container">
<div style="display: flex; justify-content: center;">
<a href="https://github.com/ImPavloh/rvc-tts/blob/main/LICENSE" target="_blank">
<img src="https://img.shields.io/github/license/impavloh/voiceit?style=for-the-badge&logo=github&logoColor=white" alt="License">
</a>
<a href="https://github.com/ImPavloh/rvc-tts" target="_blank">
<img src="https://img.shields.io/badge/repository-%23121011.svg?style=for-the-badge&logo=github&logoColor=white" alt="GitHub">
</a>
<form action="https://www.paypal.com/donate" method="post" target="_blank">
<input type="hidden" name="hosted_button_id" value="6FPWP9AWEKSWJ" />
<input type="image" src="https://img.shields.io/badge/support-%2300457C.svg?style=for-the-badge&logo=paypal&logoColor=white" border="0" name="submit" alt="Donate with PayPal" />
<img alt="" border="0" src="https://www.paypal.com/es_ES/i/scr/pixel.gif" width="1" height="1" />
</form>
<a href="https://twitter.com/impavloh" target="_blank">
<img src="https://img.shields.io/badge/follow-%231DA1F2.svg?style=for-the-badge&logo=twitter&logoColor=white" alt="Twitter">
</a>
</div>
<div style="display: inline-flex; align-items: center; gap: 0.8rem; font-size: 1.75rem;">
<h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px">🗣️ RVC TTS Demo - <a style="text-decoration: underline;" href="https://twitter.com/impavloh">Pavloh</a></h1>
</div>
<p style="margin-bottom: 10px; font-size: 94%; line-height: 23px;">An AI-Powered Text-to-Speech</p>
<p><b>Try out the <a style="text-decoration: underline;" href="https://github.com/ImPavloh/rvc-tts-discord-bot">RVC Text-to-Speech Discord Bot</a></b></p>
</div>
""")
with gr.Row():
with gr.Column():
m1 = gr.Dropdown(label="📦 Voice Model", choices=list(model_functions.keys()), allow_custom_value=False, value="Ibai")
t2 = gr.Dropdown(label="⚙️ Voice style and language [Edge-TTS]", choices=voices, allow_custom_value=False, value="es-ES-AlvaroNeural-Male")
t1 = gr.Textbox(label="📝 Text to convert")
c1 = gr.Button("Convert", variant="primary")
a1 = gr.Audio(label="🔉 Converted Text", interactive=False)
def call_selected_model_fn(selected_model, t1, t2):
vc_fn = model_functions[selected_model]
return vc_fn(t1, t2)
c1.click(fn=call_selected_model_fn, inputs=[m1, t1, t2], outputs=[a1])
gr.HTML("""
<center>
<p><i> By using this website, you agree to the <a style="text-decoration: underline;" href="https://github.com/ImPavloh/rvc-tts/blob/main/LICENSE">license</a>. </i></p>
</center>
""")
pavloh.queue(concurrency_count=1).launch() |