Spaces:
Runtime error
Runtime error
File size: 7,938 Bytes
d595cc9 d05c508 c275b63 d595cc9 c275b63 d595cc9 c275b63 bd79dbb d05c508 c275b63 d05c508 c275b63 bd79dbb c275b63 bd79dbb c275b63 bd79dbb d595cc9 bd79dbb d595cc9 bd79dbb d595cc9 bd79dbb c275b63 d05c508 c275b63 d05c508 bd79dbb d05c508 d595cc9 d05c508 bd79dbb c275b63 bd79dbb c275b63 bd79dbb c275b63 bd79dbb c275b63 bd79dbb c275b63 bd79dbb c275b63 bd79dbb c275b63 bd79dbb c275b63 bd79dbb c275b63 d05c508 c275b63 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
import os
import uuid
import gradio as gr
from src.gradio_demo import SadTalker
from infer_onnx import TTS
from huggingface_hub import snapshot_download
# Список моделей TTS для выбора
models = ["TeraTTS/natasha-g2p-vits", "TeraTTS/glados2-g2p-vits", "TeraTTS/glados-g2p-vits", "TeraTTS/girl_nice-g2p-vits"]
# Создаем словарь моделей и инициализируем их
models = {k: TTS(k) for k in models}
# Функция для синтеза речи
def text_to_speech(model_name, length_scale, text):
time_tag = str(uuid.uuid4())
save_dir = './results/voice_input'
os.makedirs(save_dir, exist_ok=True)
file_name = os.path.join(save_dir, os.path.basename(time_tag + '.wav'))
open(file_name, "wb").close()
audio = models[model_name](text, length_scale=length_scale)
models[model_name].save_wav(audio, file_name, sample_rate=models[model_name].config["samplerate"])
return file_name
def get_source_image(image):
return image
try:
import webui # in webui
in_webui = True
except:
in_webui = False
def toggle_audio_file(choice):
if choice == False:
return gr.update(visible=True), gr.update(visible=False)
else:
return gr.update(visible=False), gr.update(visible=True)
def ref_video_fn(path_of_ref_video):
if path_of_ref_video is not None:
return gr.update(value=True)
else:
return gr.update(value=False)
def download_model():
REPO_ID = 'vinthony/SadTalker-V002rc'
snapshot_download(repo_id=REPO_ID, local_dir='./checkpoints', local_dir_use_symlinks=True)
def sadtalker_demo():
download_model()
sad_talker = SadTalker(lazy_load=True)
with gr.Blocks(analytics_enabled=False) as sadtalker_interface:
with gr.Row():
with gr.Column(variant='panel'):
with gr.Tabs(elem_id="sadtalker_source_image"):
with gr.TabItem('Исходное изображение'):
with gr.Row():
source_image = gr.Image(label="Аватарка", source="upload", type="filepath", elem_id="img2img_image")
with gr.Tabs(elem_id="sadtalker_driven_audio"):
with gr.TabItem('Генерация или загрузка аудио'):
with gr.Row():
model_choice = gr.Dropdown(choices=list(models.keys()), value="TeraTTS/natasha-g2p-vits", label="Выберите модель TTS для синтеза речи:")
with gr.Row(visible=False):
length_scale = gr.Slider(minimum=0.1, maximum=2.0, label="Length scale (увеличить длину звучания) По умолчанию: 1.2", value=1.2)
with gr.Row():
input_text = gr.Textbox(label="Введите текст для синтеза речи:")
with gr.Row():
driven_audio = gr.Audio(label="Аудиофайл", source="upload", type="filepath")
driven_audio_no = gr.Audio(label="Use IDLE mode, no audio is required", source="upload", type="filepath", visible=False)
with gr.Column(visible=False):
use_idle_mode = gr.Checkbox(label="Use Idle Animation", visible=False)
length_of_audio = gr.Number(value=5, label="The length(seconds) of the generated video.")
use_idle_mode.change(toggle_audio_file, inputs=use_idle_mode, outputs=[driven_audio, driven_audio_no]) # todo
with gr.Row():
play_button = gr.Button('Синтез речи', variant='primary')
play_button.click(
fn=text_to_speech, inputs=[model_choice, length_scale, input_text], outputs=[driven_audio]
)
with gr.Row():
ref_video = gr.Video(label="Видео для генерации", source="upload", type="filepath", elem_id="vidref")
with gr.Column():
use_ref_video = gr.Checkbox(label="Использовать видео для генерации")
ref_info = gr.Radio(['pose', 'blink','pose+blink', 'all'], value='pose', label='Видео для эталона',info="Как использовать эталонное видео?((video driving mode))")
ref_video.change(ref_video_fn, inputs=ref_video, outputs=[use_ref_video]) # todo
with gr.Column(variant='panel'):
with gr.Tabs(elem_id="sadtalker_checkbox"):
with gr.TabItem('Настройки генерации видео'):
with gr.Column(variant='panel'):
with gr.Row():
pose_style = gr.Slider(minimum=0, maximum=45, step=1, label="Стиль", value=0) #
exp_weight = gr.Slider(minimum=0, maximum=3, step=0.1, label="Степень выразительности", value=1) #
blink_every = gr.Checkbox(label="Моргание", value=True)
with gr.Row():
size_of_image = gr.Radio([256, 512], value=256, label='Разрешение модели лица', info="256/512?") #
preprocess_type = gr.Radio(['crop', 'resize','full', 'extcrop', 'extfull'], value='crop', label='Предварительная обработка', info="Как обрабатывать входное изображение?")
with gr.Row():
is_still_mode = gr.Checkbox(label="Режим неподвижности (меньше движений головы, работает с full режимом)")
facerender = gr.Radio(['facevid2vid','pirender'], value='facevid2vid', label='Рендер лица', info="Какой использовать?")
with gr.Row():
batch_size = gr.Slider(label="Размер пакета при генерации", step=1, maximum=10, value=1)
enhancer = gr.Checkbox(label="GFPGAN как пост-обработчик лица")
submit = gr.Button('Генерировать', elem_id="sadtalker_generate", variant='primary')
with gr.Tabs(elem_id="sadtalker_genearted"):
gen_video = gr.Video(label="Сгенерированное изображение", format="mp4")
submit.click(
fn=sad_talker.test,
inputs=[source_image,
driven_audio,
preprocess_type,
is_still_mode,
enhancer,
batch_size,
size_of_image,
pose_style,
facerender,
exp_weight,
use_ref_video,
ref_video,
ref_info,
use_idle_mode,
length_of_audio,
blink_every
],
outputs=[gen_video]
)
return sadtalker_interface
if __name__ == "__main__":
demo = sadtalker_demo()
demo.queue(max_size=10)
demo.launch(debug=True)
|