Spaces:
Runtime error
Runtime error
Updated packages and requirements, added device print statement, modified README, added webui.bat and test.sh scripts, and downloaded new models and enhancer weights.
Browse files- .gitattributes +18 -1
- README.md +5 -4
- app.py +102 -102
- packages.txt +2 -1
- requirements.txt +2 -1
- scripts/download_models.sh +32 -0
- scripts/extension.py +189 -0
- scripts/test.sh +21 -0
- src/gradio_demo.py +1 -0
- webui.bat +17 -0
.gitattributes
CHANGED
@@ -25,7 +25,6 @@
|
|
25 |
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
*.wasm filter=lfs diff=lfs merge=lfs -text
|
@@ -33,3 +32,21 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
|
|
28 |
*.tflite filter=lfs diff=lfs merge=lfs -text
|
29 |
*.tgz filter=lfs diff=lfs merge=lfs -text
|
30 |
*.wasm filter=lfs diff=lfs merge=lfs -text
|
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
+
checkpoints/BFM_Fitting/01_MorphableModel.mat filter=lfs diff=lfs merge=lfs -text
|
36 |
+
checkpoints/BFM_Fitting/BFM09_model_info.mat filter=lfs diff=lfs merge=lfs -text
|
37 |
+
checkpoints/facevid2vid_00189-model.pth.tar filter=lfs diff=lfs merge=lfs -text
|
38 |
+
checkpoints/mapping_00229-model.pth.tar filter=lfs diff=lfs merge=lfs -text
|
39 |
+
checkpoints/shape_predictor_68_face_landmarks.dat filter=lfs diff=lfs merge=lfs -text
|
40 |
+
examples/driven_audio/chinese_news.wav filter=lfs diff=lfs merge=lfs -text
|
41 |
+
examples/driven_audio/deyu.wav filter=lfs diff=lfs merge=lfs -text
|
42 |
+
examples/driven_audio/eluosi.wav filter=lfs diff=lfs merge=lfs -text
|
43 |
+
examples/driven_audio/fayu.wav filter=lfs diff=lfs merge=lfs -text
|
44 |
+
examples/driven_audio/imagine.wav filter=lfs diff=lfs merge=lfs -text
|
45 |
+
examples/driven_audio/japanese.wav filter=lfs diff=lfs merge=lfs -text
|
46 |
+
examples/source_image/art_16.png filter=lfs diff=lfs merge=lfs -text
|
47 |
+
examples/source_image/art_17.png filter=lfs diff=lfs merge=lfs -text
|
48 |
+
examples/source_image/art_3.png filter=lfs diff=lfs merge=lfs -text
|
49 |
+
examples/source_image/art_4.png filter=lfs diff=lfs merge=lfs -text
|
50 |
+
examples/source_image/art_5.png filter=lfs diff=lfs merge=lfs -text
|
51 |
+
examples/source_image/art_8.png filter=lfs diff=lfs merge=lfs -text
|
52 |
+
examples/source_image/art_9.png filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
@@ -1,12 +1,13 @@
|
|
1 |
---
|
2 |
title: AvatarTest
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
-
sdk_version:
|
8 |
app_file: app.py
|
9 |
pinned: false
|
|
|
10 |
---
|
11 |
|
12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
title: AvatarTest
|
3 |
+
emoji: 😭
|
4 |
+
colorFrom: purple
|
5 |
+
colorTo: green
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 3.23.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
+
license: mit
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
import os, sys
|
|
|
2 |
import tempfile
|
|
|
3 |
import gradio as gr
|
4 |
from src.gradio_demo import SadTalker
|
5 |
# from src.utils.text2speech import TTSTalker
|
@@ -28,8 +30,88 @@ def ref_video_fn(path_of_ref_video):
|
|
28 |
return gr.update(value=False)
|
29 |
|
30 |
def download_model():
|
31 |
-
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
def sadtalker_demo():
|
35 |
|
@@ -38,41 +120,37 @@ def sadtalker_demo():
|
|
38 |
sad_talker = SadTalker(lazy_load=True)
|
39 |
# tts_talker = TTSTalker()
|
40 |
|
41 |
-
with gr.Blocks(analytics_enabled=False) as sadtalker_interface:
|
42 |
-
gr.
|
43 |
-
<a style='font-size:18px;color: #efefef' href='https://arxiv.org/abs/2211.12194'>Arxiv</a> \
|
44 |
-
<a style='font-size:18px;color: #efefef' href='https://sadtalker.github.io'>Homepage</a> \
|
45 |
-
<a style='font-size:18px;color: #efefef' href='https://github.com/Winfredy/SadTalker'> Github </div>")
|
46 |
-
|
47 |
-
|
48 |
-
gr.Markdown("""
|
49 |
-
<b>You may duplicate the space and upgrade to GPU in settings for better performance and faster inference without waiting in the queue. <a style='display:inline-block' href="https://huggingface.co/spaces/vinthony/SadTalker?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a></b> \
|
50 |
-
<br/><b>Alternatively, try our GitHub <a href=https://github.com/Winfredy/SadTalker> code </a> on your own GPU. </b> <a style='display:inline-block' href="https://github.com/Winfredy/SadTalker"><img src="https://img.shields.io/github/stars/Winfredy/SadTalker?style=social"/></a> \
|
51 |
-
""")
|
52 |
-
|
53 |
-
with gr.Row().style(equal_height=False):
|
54 |
with gr.Column(variant='panel'):
|
55 |
with gr.Tabs(elem_id="sadtalker_source_image"):
|
56 |
with gr.TabItem('Source image'):
|
57 |
with gr.Row():
|
58 |
-
source_image = gr.Image(label="Source image", source="upload", type="filepath", elem_id="img2img_image")
|
59 |
|
60 |
|
61 |
with gr.Tabs(elem_id="sadtalker_driven_audio"):
|
62 |
with gr.TabItem('Driving Methods'):
|
63 |
-
gr.Markdown("Possible driving combinations: <br> 1. Audio only 2. Audio/IDLE Mode + Ref Video(pose, blink, pose+blink) 3. IDLE Mode only 4. Ref Video only (all) ")
|
64 |
-
|
65 |
with gr.Row():
|
66 |
driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath")
|
67 |
driven_audio_no = gr.Audio(label="Use IDLE mode, no audio is required", source="upload", type="filepath", visible=False)
|
68 |
|
69 |
-
with gr.Column():
|
70 |
-
use_idle_mode = gr.Checkbox(label="Use Idle Animation")
|
71 |
length_of_audio = gr.Number(value=5, label="The length(seconds) of the generated video.")
|
72 |
use_idle_mode.change(toggle_audio_file, inputs=use_idle_mode, outputs=[driven_audio, driven_audio_no]) # todo
|
73 |
-
|
74 |
with gr.Row():
|
75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
with gr.Column():
|
78 |
use_ref_video = gr.Checkbox(label="Use Reference Video")
|
@@ -84,7 +162,6 @@ def sadtalker_demo():
|
|
84 |
with gr.Column(variant='panel'):
|
85 |
with gr.Tabs(elem_id="sadtalker_checkbox"):
|
86 |
with gr.TabItem('Settings'):
|
87 |
-
gr.Markdown("need help? please visit our [[best practice page](https://github.com/OpenTalker/SadTalker/blob/main/docs/best_practice.md)] for more detials")
|
88 |
with gr.Column(variant='panel'):
|
89 |
# width = gr.Slider(minimum=64, elem_id="img2img_width", maximum=2048, step=8, label="Manually Crop Width", value=512) # img2img_width
|
90 |
# height = gr.Slider(minimum=64, elem_id="img2img_height", maximum=2048, step=8, label="Manually Crop Height", value=512) # img2img_width
|
@@ -108,7 +185,7 @@ def sadtalker_demo():
|
|
108 |
submit = gr.Button('Generate', elem_id="sadtalker_generate", variant='primary')
|
109 |
|
110 |
with gr.Tabs(elem_id="sadtalker_genearted"):
|
111 |
-
gen_video = gr.Video(label="Generated video", format="mp4")
|
112 |
|
113 |
|
114 |
|
@@ -132,84 +209,7 @@ def sadtalker_demo():
|
|
132 |
blink_every
|
133 |
],
|
134 |
outputs=[gen_video]
|
135 |
-
)
|
136 |
-
|
137 |
-
with gr.Row():
|
138 |
-
examples = [
|
139 |
-
[
|
140 |
-
'examples/source_image/full_body_1.png',
|
141 |
-
'examples/driven_audio/bus_chinese.wav',
|
142 |
-
'crop',
|
143 |
-
True,
|
144 |
-
False
|
145 |
-
],
|
146 |
-
[
|
147 |
-
'examples/source_image/full_body_2.png',
|
148 |
-
'examples/driven_audio/japanese.wav',
|
149 |
-
'crop',
|
150 |
-
False,
|
151 |
-
False
|
152 |
-
],
|
153 |
-
[
|
154 |
-
'examples/source_image/full3.png',
|
155 |
-
'examples/driven_audio/deyu.wav',
|
156 |
-
'crop',
|
157 |
-
False,
|
158 |
-
True
|
159 |
-
],
|
160 |
-
[
|
161 |
-
'examples/source_image/full4.jpeg',
|
162 |
-
'examples/driven_audio/eluosi.wav',
|
163 |
-
'full',
|
164 |
-
False,
|
165 |
-
True
|
166 |
-
],
|
167 |
-
[
|
168 |
-
'examples/source_image/full4.jpeg',
|
169 |
-
'examples/driven_audio/imagine.wav',
|
170 |
-
'full',
|
171 |
-
True,
|
172 |
-
True
|
173 |
-
],
|
174 |
-
[
|
175 |
-
'examples/source_image/full_body_1.png',
|
176 |
-
'examples/driven_audio/bus_chinese.wav',
|
177 |
-
'full',
|
178 |
-
True,
|
179 |
-
False
|
180 |
-
],
|
181 |
-
[
|
182 |
-
'examples/source_image/art_13.png',
|
183 |
-
'examples/driven_audio/fayu.wav',
|
184 |
-
'resize',
|
185 |
-
True,
|
186 |
-
False
|
187 |
-
],
|
188 |
-
[
|
189 |
-
'examples/source_image/art_5.png',
|
190 |
-
'examples/driven_audio/chinese_news.wav',
|
191 |
-
'resize',
|
192 |
-
False,
|
193 |
-
False
|
194 |
-
],
|
195 |
-
[
|
196 |
-
'examples/source_image/art_5.png',
|
197 |
-
'examples/driven_audio/RD_Radio31_000.wav',
|
198 |
-
'resize',
|
199 |
-
True,
|
200 |
-
True
|
201 |
-
],
|
202 |
-
]
|
203 |
-
gr.Examples(examples=examples,
|
204 |
-
inputs=[
|
205 |
-
source_image,
|
206 |
-
driven_audio,
|
207 |
-
preprocess_type,
|
208 |
-
is_still_mode,
|
209 |
-
enhancer],
|
210 |
-
outputs=[gen_video],
|
211 |
-
fn=sad_talker.test,
|
212 |
-
cache_examples=os.getenv('SYSTEM') == 'spaces') #
|
213 |
|
214 |
return sadtalker_interface
|
215 |
|
|
|
1 |
import os, sys
|
2 |
+
import uuid
|
3 |
import tempfile
|
4 |
+
import pyttsx3
|
5 |
import gradio as gr
|
6 |
from src.gradio_demo import SadTalker
|
7 |
# from src.utils.text2speech import TTSTalker
|
|
|
30 |
return gr.update(value=False)
|
31 |
|
32 |
def download_model():
|
33 |
+
REPO_ID = 'vinthony/SadTalker-V002rc'
|
34 |
+
snapshot_download(REPO_ID)
|
35 |
+
|
36 |
+
# language : en_US, de_DE, ...
|
37 |
+
# gender : VoiceGenderFemale, VoiceGenderMale
|
38 |
+
def change_voice(engine, language='ru_ru', gender='male'):
|
39 |
+
|
40 |
+
selected_voices = []
|
41 |
+
|
42 |
+
language = language.lower() if language else ''
|
43 |
+
gender = gender.lower() if gender else ''
|
44 |
+
|
45 |
+
for voice in engine.getProperty('voices'):
|
46 |
+
voice_appended = False
|
47 |
+
|
48 |
+
for lang in voice.languages:
|
49 |
+
|
50 |
+
lang_str = str(lang, 'utf-8')
|
51 |
+
print("lang", lang_str)
|
52 |
+
|
53 |
+
if lang_str and language in lang_str.lower():
|
54 |
+
selected_voices.append(voice)
|
55 |
+
print("voice appended by lang", voice, lang_str)
|
56 |
+
voice_appended = True
|
57 |
+
break
|
58 |
+
|
59 |
+
if voice_appended:
|
60 |
+
continue
|
61 |
+
|
62 |
+
if voice.id and language in voice.id.lower():
|
63 |
+
selected_voices.append(voice)
|
64 |
+
print("voice appended by id", voice.id)
|
65 |
+
continue
|
66 |
+
|
67 |
+
if voice.name and language in voice.name.lower():
|
68 |
+
selected_voices.append(voice)
|
69 |
+
print("voice appended by name", voice.name)
|
70 |
+
continue
|
71 |
+
|
72 |
+
for voice in selected_voices:
|
73 |
+
if voice.gender and gender in voice.gender.lower():
|
74 |
+
engine.setProperty('voice', voice.id)
|
75 |
+
print("voice selected by gender", voice.gender)
|
76 |
+
return True
|
77 |
+
|
78 |
+
if voice.id and gender in voice.id.lower():
|
79 |
+
engine.setProperty('voice', voice.id)
|
80 |
+
print("voice selected by id", voice.id)
|
81 |
+
return True
|
82 |
+
if voice.name and gender in voice.name.lower():
|
83 |
+
engine.setProperty('voice', voice.id)
|
84 |
+
print("voice selected by name", voice.name)
|
85 |
+
return True
|
86 |
+
|
87 |
+
if len(selected_voices) > 0:
|
88 |
+
engine.setProperty('voice', selected_voices[0].id)
|
89 |
+
print("voice selected by default", selected_voices[0].id)
|
90 |
+
return True
|
91 |
+
|
92 |
+
return False
|
93 |
+
|
94 |
+
def play_text_to_speech(text_input, voice_option):
|
95 |
+
engine = pyttsx3.init()
|
96 |
+
|
97 |
+
change_voice(engine, 'ru', voice_option)
|
98 |
+
|
99 |
+
print("text_input", text_input)
|
100 |
+
print("voice_option", voice_option)
|
101 |
+
|
102 |
+
time_tag = str(uuid.uuid4())
|
103 |
+
save_dir = './results/voice_input'
|
104 |
+
os.makedirs(save_dir, exist_ok=True)
|
105 |
+
file_name = os.path.join(save_dir, os.path.basename(time_tag + '.wav'))
|
106 |
+
|
107 |
+
open(file_name, "wb").close()
|
108 |
+
engine.say(text_input)
|
109 |
+
engine.save_to_file(text_input, file_name)
|
110 |
+
engine.runAndWait()
|
111 |
+
|
112 |
+
print("file saved to", file_name)
|
113 |
+
|
114 |
+
return file_name
|
115 |
|
116 |
def sadtalker_demo():
|
117 |
|
|
|
120 |
sad_talker = SadTalker(lazy_load=True)
|
121 |
# tts_talker = TTSTalker()
|
122 |
|
123 |
+
with gr.Blocks(analytics_enabled=False) as sadtalker_interface:
|
124 |
+
with gr.Row():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
with gr.Column(variant='panel'):
|
126 |
with gr.Tabs(elem_id="sadtalker_source_image"):
|
127 |
with gr.TabItem('Source image'):
|
128 |
with gr.Row():
|
129 |
+
source_image = gr.Image(label="Source image", source="upload", type="filepath", elem_id="img2img_image")
|
130 |
|
131 |
|
132 |
with gr.Tabs(elem_id="sadtalker_driven_audio"):
|
133 |
with gr.TabItem('Driving Methods'):
|
|
|
|
|
134 |
with gr.Row():
|
135 |
driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath")
|
136 |
driven_audio_no = gr.Audio(label="Use IDLE mode, no audio is required", source="upload", type="filepath", visible=False)
|
137 |
|
138 |
+
with gr.Column(visible=False):
|
139 |
+
use_idle_mode = gr.Checkbox(label="Use Idle Animation", visible=False)
|
140 |
length_of_audio = gr.Number(value=5, label="The length(seconds) of the generated video.")
|
141 |
use_idle_mode.change(toggle_audio_file, inputs=use_idle_mode, outputs=[driven_audio, driven_audio_no]) # todo
|
|
|
142 |
with gr.Row():
|
143 |
+
text_input = gr.Textbox(label="Enter text", multiline=True)
|
144 |
+
voice_option = gr.Radio(['Male', 'Female'], label='Voice Option', value='Female')
|
145 |
+
with gr.Row():
|
146 |
+
play_button = gr.Button('Text To Speech', variant='primary')
|
147 |
+
play_button.click(
|
148 |
+
fn=play_text_to_speech,
|
149 |
+
inputs=[text_input, voice_option],
|
150 |
+
outputs=[driven_audio]
|
151 |
+
)
|
152 |
+
with gr.Row():
|
153 |
+
ref_video = gr.Video(label="Reference Video", source="upload", type="filepath", elem_id="vidref")
|
154 |
|
155 |
with gr.Column():
|
156 |
use_ref_video = gr.Checkbox(label="Use Reference Video")
|
|
|
162 |
with gr.Column(variant='panel'):
|
163 |
with gr.Tabs(elem_id="sadtalker_checkbox"):
|
164 |
with gr.TabItem('Settings'):
|
|
|
165 |
with gr.Column(variant='panel'):
|
166 |
# width = gr.Slider(minimum=64, elem_id="img2img_width", maximum=2048, step=8, label="Manually Crop Width", value=512) # img2img_width
|
167 |
# height = gr.Slider(minimum=64, elem_id="img2img_height", maximum=2048, step=8, label="Manually Crop Height", value=512) # img2img_width
|
|
|
185 |
submit = gr.Button('Generate', elem_id="sadtalker_generate", variant='primary')
|
186 |
|
187 |
with gr.Tabs(elem_id="sadtalker_genearted"):
|
188 |
+
gen_video = gr.Video(label="Generated video", format="mp4")
|
189 |
|
190 |
|
191 |
|
|
|
209 |
blink_every
|
210 |
],
|
211 |
outputs=[gen_video]
|
212 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
213 |
|
214 |
return sadtalker_interface
|
215 |
|
packages.txt
CHANGED
@@ -1,2 +1,3 @@
|
|
1 |
ffmpeg
|
2 |
-
libsndfile1
|
|
|
|
1 |
ffmpeg
|
2 |
+
libsndfile1
|
3 |
+
libespeak1
|
requirements.txt
CHANGED
@@ -21,4 +21,5 @@ facexlib==0.3.0
|
|
21 |
dlib-bin
|
22 |
gfpgan
|
23 |
av
|
24 |
-
safetensors
|
|
|
|
21 |
dlib-bin
|
22 |
gfpgan
|
23 |
av
|
24 |
+
safetensors
|
25 |
+
pyttsx3==2.90
|
scripts/download_models.sh
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
mkdir ./checkpoints
|
2 |
+
|
3 |
+
# lagency download link
|
4 |
+
# wget -nc https://github.com/Winfredy/SadTalker/releases/download/v0.0.2/auido2exp_00300-model.pth -O ./checkpoints/auido2exp_00300-model.pth
|
5 |
+
# wget -nc https://github.com/Winfredy/SadTalker/releases/download/v0.0.2/auido2pose_00140-model.pth -O ./checkpoints/auido2pose_00140-model.pth
|
6 |
+
# wget -nc https://github.com/Winfredy/SadTalker/releases/download/v0.0.2/epoch_20.pth -O ./checkpoints/epoch_20.pth
|
7 |
+
# wget -nc https://github.com/Winfredy/SadTalker/releases/download/v0.0.2/facevid2vid_00189-model.pth.tar -O ./checkpoints/facevid2vid_00189-model.pth.tar
|
8 |
+
# wget -nc https://github.com/Winfredy/SadTalker/releases/download/v0.0.2/shape_predictor_68_face_landmarks.dat -O ./checkpoints/shape_predictor_68_face_landmarks.dat
|
9 |
+
# wget -nc https://github.com/Winfredy/SadTalker/releases/download/v0.0.2/wav2lip.pth -O ./checkpoints/wav2lip.pth
|
10 |
+
# wget -nc https://github.com/Winfredy/SadTalker/releases/download/v0.0.2/mapping_00229-model.pth.tar -O ./checkpoints/mapping_00229-model.pth.tar
|
11 |
+
# wget -nc https://github.com/Winfredy/SadTalker/releases/download/v0.0.2/mapping_00109-model.pth.tar -O ./checkpoints/mapping_00109-model.pth.tar
|
12 |
+
# wget -nc https://github.com/Winfredy/SadTalker/releases/download/v0.0.2/hub.zip -O ./checkpoints/hub.zip
|
13 |
+
# unzip -n ./checkpoints/hub.zip -d ./checkpoints/
|
14 |
+
|
15 |
+
|
16 |
+
#### download the new links.
|
17 |
+
wget -nc https://github.com/OpenTalker/SadTalker/releases/download/v0.0.2-rc/mapping_00109-model.pth.tar -O ./checkpoints/mapping_00109-model.pth.tar
|
18 |
+
wget -nc https://github.com/OpenTalker/SadTalker/releases/download/v0.0.2-rc/mapping_00229-model.pth.tar -O ./checkpoints/mapping_00229-model.pth.tar
|
19 |
+
wget -nc https://github.com/OpenTalker/SadTalker/releases/download/v0.0.2-rc/SadTalker_V0.0.2_256.safetensors -O ./checkpoints/SadTalker_V0.0.2_256.safetensors
|
20 |
+
wget -nc https://github.com/OpenTalker/SadTalker/releases/download/v0.0.2-rc/SadTalker_V0.0.2_512.safetensors -O ./checkpoints/SadTalker_V0.0.2_512.safetensors
|
21 |
+
|
22 |
+
|
23 |
+
# wget -nc https://github.com/Winfredy/SadTalker/releases/download/v0.0.2/BFM_Fitting.zip -O ./checkpoints/BFM_Fitting.zip
|
24 |
+
# unzip -n ./checkpoints/BFM_Fitting.zip -d ./checkpoints/
|
25 |
+
|
26 |
+
### enhancer
|
27 |
+
mkdir -p ./gfpgan/weights
|
28 |
+
wget -nc https://github.com/xinntao/facexlib/releases/download/v0.1.0/alignment_WFLW_4HG.pth -O ./gfpgan/weights/alignment_WFLW_4HG.pth
|
29 |
+
wget -nc https://github.com/xinntao/facexlib/releases/download/v0.1.0/detection_Resnet50_Final.pth -O ./gfpgan/weights/detection_Resnet50_Final.pth
|
30 |
+
wget -nc https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.4.pth -O ./gfpgan/weights/GFPGANv1.4.pth
|
31 |
+
wget -nc https://github.com/xinntao/facexlib/releases/download/v0.2.2/parsing_parsenet.pth -O ./gfpgan/weights/parsing_parsenet.pth
|
32 |
+
|
scripts/extension.py
ADDED
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os, sys
|
2 |
+
from pathlib import Path
|
3 |
+
import tempfile
|
4 |
+
import gradio as gr
|
5 |
+
from modules.call_queue import wrap_gradio_gpu_call, wrap_queued_call
|
6 |
+
from modules.shared import opts, OptionInfo
|
7 |
+
from modules import shared, paths, script_callbacks
|
8 |
+
import launch
|
9 |
+
import glob
|
10 |
+
from huggingface_hub import snapshot_download
|
11 |
+
|
12 |
+
|
13 |
+
|
14 |
+
def check_all_files_safetensor(current_dir):
|
15 |
+
kv = {
|
16 |
+
"SadTalker_V0.0.2_256.safetensors": "sadtalker-256",
|
17 |
+
"SadTalker_V0.0.2_512.safetensors": "sadtalker-512",
|
18 |
+
"mapping_00109-model.pth.tar" : "mapping-109" ,
|
19 |
+
"mapping_00229-model.pth.tar" : "mapping-229" ,
|
20 |
+
}
|
21 |
+
|
22 |
+
if not os.path.isdir(current_dir):
|
23 |
+
return False
|
24 |
+
|
25 |
+
dirs = os.listdir(current_dir)
|
26 |
+
|
27 |
+
for f in dirs:
|
28 |
+
if f in kv.keys():
|
29 |
+
del kv[f]
|
30 |
+
|
31 |
+
return len(kv.keys()) == 0
|
32 |
+
|
33 |
+
def check_all_files(current_dir):
|
34 |
+
kv = {
|
35 |
+
"auido2exp_00300-model.pth": "audio2exp",
|
36 |
+
"auido2pose_00140-model.pth": "audio2pose",
|
37 |
+
"epoch_20.pth": "face_recon",
|
38 |
+
"facevid2vid_00189-model.pth.tar": "face-render",
|
39 |
+
"mapping_00109-model.pth.tar" : "mapping-109" ,
|
40 |
+
"mapping_00229-model.pth.tar" : "mapping-229" ,
|
41 |
+
"wav2lip.pth": "wav2lip",
|
42 |
+
"shape_predictor_68_face_landmarks.dat": "dlib",
|
43 |
+
}
|
44 |
+
|
45 |
+
if not os.path.isdir(current_dir):
|
46 |
+
return False
|
47 |
+
|
48 |
+
dirs = os.listdir(current_dir)
|
49 |
+
|
50 |
+
for f in dirs:
|
51 |
+
if f in kv.keys():
|
52 |
+
del kv[f]
|
53 |
+
|
54 |
+
return len(kv.keys()) == 0
|
55 |
+
|
56 |
+
|
57 |
+
|
58 |
+
def download_model(local_dir='./checkpoints'):
|
59 |
+
REPO_ID = 'vinthony/SadTalker'
|
60 |
+
snapshot_download(repo_id=REPO_ID, local_dir=local_dir, local_dir_use_symlinks=False)
|
61 |
+
|
62 |
+
def get_source_image(image):
|
63 |
+
return image
|
64 |
+
|
65 |
+
def get_img_from_txt2img(x):
|
66 |
+
talker_path = Path(paths.script_path) / "outputs"
|
67 |
+
imgs_from_txt_dir = str(talker_path / "txt2img-images/")
|
68 |
+
imgs = glob.glob(imgs_from_txt_dir+'/*/*.png')
|
69 |
+
imgs.sort(key=lambda x:os.path.getmtime(os.path.join(imgs_from_txt_dir, x)))
|
70 |
+
img_from_txt_path = os.path.join(imgs_from_txt_dir, imgs[-1])
|
71 |
+
return img_from_txt_path, img_from_txt_path
|
72 |
+
|
73 |
+
def get_img_from_img2img(x):
|
74 |
+
talker_path = Path(paths.script_path) / "outputs"
|
75 |
+
imgs_from_img_dir = str(talker_path / "img2img-images/")
|
76 |
+
imgs = glob.glob(imgs_from_img_dir+'/*/*.png')
|
77 |
+
imgs.sort(key=lambda x:os.path.getmtime(os.path.join(imgs_from_img_dir, x)))
|
78 |
+
img_from_img_path = os.path.join(imgs_from_img_dir, imgs[-1])
|
79 |
+
return img_from_img_path, img_from_img_path
|
80 |
+
|
81 |
+
def get_default_checkpoint_path():
|
82 |
+
# check the path of models/checkpoints and extensions/
|
83 |
+
checkpoint_path = Path(paths.script_path) / "models"/ "SadTalker"
|
84 |
+
extension_checkpoint_path = Path(paths.script_path) / "extensions"/ "SadTalker" / "checkpoints"
|
85 |
+
|
86 |
+
if check_all_files_safetensor(checkpoint_path):
|
87 |
+
# print('founding sadtalker checkpoint in ' + str(checkpoint_path))
|
88 |
+
return checkpoint_path
|
89 |
+
|
90 |
+
if check_all_files_safetensor(extension_checkpoint_path):
|
91 |
+
# print('founding sadtalker checkpoint in ' + str(extension_checkpoint_path))
|
92 |
+
return extension_checkpoint_path
|
93 |
+
|
94 |
+
if check_all_files(checkpoint_path):
|
95 |
+
# print('founding sadtalker checkpoint in ' + str(checkpoint_path))
|
96 |
+
return checkpoint_path
|
97 |
+
|
98 |
+
if check_all_files(extension_checkpoint_path):
|
99 |
+
# print('founding sadtalker checkpoint in ' + str(extension_checkpoint_path))
|
100 |
+
return extension_checkpoint_path
|
101 |
+
|
102 |
+
return None
|
103 |
+
|
104 |
+
|
105 |
+
|
106 |
+
def install():
|
107 |
+
|
108 |
+
kv = {
|
109 |
+
"face_alignment": "face-alignment==1.3.5",
|
110 |
+
"imageio": "imageio==2.19.3",
|
111 |
+
"imageio_ffmpeg": "imageio-ffmpeg==0.4.7",
|
112 |
+
"librosa":"librosa==0.8.0",
|
113 |
+
"pydub":"pydub==0.25.1",
|
114 |
+
"scipy":"scipy==1.8.1",
|
115 |
+
"tqdm": "tqdm",
|
116 |
+
"yacs":"yacs==0.1.8",
|
117 |
+
"yaml": "pyyaml",
|
118 |
+
"av":"av",
|
119 |
+
"gfpgan": "gfpgan",
|
120 |
+
}
|
121 |
+
|
122 |
+
# # dlib is not necessary currently
|
123 |
+
# if 'darwin' in sys.platform:
|
124 |
+
# kv['dlib'] = "dlib"
|
125 |
+
# else:
|
126 |
+
# kv['dlib'] = 'dlib-bin'
|
127 |
+
|
128 |
+
# #### we need to have a newer version of imageio for our method.
|
129 |
+
# launch.run_pip("install imageio==2.19.3", "requirements for SadTalker")
|
130 |
+
|
131 |
+
for k,v in kv.items():
|
132 |
+
if not launch.is_installed(k):
|
133 |
+
print(k, launch.is_installed(k))
|
134 |
+
launch.run_pip("install "+ v, "requirements for SadTalker")
|
135 |
+
|
136 |
+
if os.getenv('SADTALKER_CHECKPOINTS'):
|
137 |
+
print('load Sadtalker Checkpoints from '+ os.getenv('SADTALKER_CHECKPOINTS'))
|
138 |
+
|
139 |
+
elif get_default_checkpoint_path() is not None:
|
140 |
+
os.environ['SADTALKER_CHECKPOINTS'] = str(get_default_checkpoint_path())
|
141 |
+
else:
|
142 |
+
|
143 |
+
print(
|
144 |
+
""""
|
145 |
+
SadTalker will not support download all the files from hugging face, which will take a long time.
|
146 |
+
|
147 |
+
please manually set the SADTALKER_CHECKPOINTS in `webui_user.bat`(windows) or `webui_user.sh`(linux)
|
148 |
+
"""
|
149 |
+
)
|
150 |
+
|
151 |
+
# python = sys.executable
|
152 |
+
|
153 |
+
# launch.run(f'"{python}" -m pip uninstall -y huggingface_hub', live=True)
|
154 |
+
# launch.run(f'"{python}" -m pip install --upgrade git+https://github.com/huggingface/huggingface_hub@main', live=True)
|
155 |
+
# ### run the scripts to downlod models to correct localtion.
|
156 |
+
# # print('download models for SadTalker')
|
157 |
+
# # launch.run("cd " + paths.script_path+"/extensions/SadTalker && bash ./scripts/download_models.sh", live=True)
|
158 |
+
# # print('SadTalker is successfully installed!')
|
159 |
+
# download_model(paths.script_path+'/extensions/SadTalker/checkpoints')
|
160 |
+
|
161 |
+
|
162 |
+
def on_ui_tabs():
|
163 |
+
install()
|
164 |
+
|
165 |
+
sys.path.extend([paths.script_path+'/extensions/SadTalker'])
|
166 |
+
|
167 |
+
repo_dir = paths.script_path+'/extensions/SadTalker/'
|
168 |
+
|
169 |
+
result_dir = opts.sadtalker_result_dir
|
170 |
+
os.makedirs(result_dir, exist_ok=True)
|
171 |
+
|
172 |
+
from app_sadtalker import sadtalker_demo
|
173 |
+
|
174 |
+
if os.getenv('SADTALKER_CHECKPOINTS'):
|
175 |
+
checkpoint_path = os.getenv('SADTALKER_CHECKPOINTS')
|
176 |
+
else:
|
177 |
+
checkpoint_path = repo_dir+'checkpoints/'
|
178 |
+
|
179 |
+
audio_to_video = sadtalker_demo(checkpoint_path=checkpoint_path, config_path=repo_dir+'src/config', warpfn = wrap_queued_call)
|
180 |
+
|
181 |
+
return [(audio_to_video, "SadTalker", "extension")]
|
182 |
+
|
183 |
+
def on_ui_settings():
|
184 |
+
talker_path = Path(paths.script_path) / "outputs"
|
185 |
+
section = ('extension', "SadTalker")
|
186 |
+
opts.add_option("sadtalker_result_dir", OptionInfo(str(talker_path / "SadTalker/"), "Path to save results of sadtalker", section=section))
|
187 |
+
|
188 |
+
script_callbacks.on_ui_settings(on_ui_settings)
|
189 |
+
script_callbacks.on_ui_tabs(on_ui_tabs)
|
scripts/test.sh
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# ### some test command before commit.
|
2 |
+
# python inference.py --preprocess crop --size 256
|
3 |
+
# python inference.py --preprocess crop --size 512
|
4 |
+
|
5 |
+
# python inference.py --preprocess extcrop --size 256
|
6 |
+
# python inference.py --preprocess extcrop --size 512
|
7 |
+
|
8 |
+
# python inference.py --preprocess resize --size 256
|
9 |
+
# python inference.py --preprocess resize --size 512
|
10 |
+
|
11 |
+
# python inference.py --preprocess full --size 256
|
12 |
+
# python inference.py --preprocess full --size 512
|
13 |
+
|
14 |
+
# python inference.py --preprocess extfull --size 256
|
15 |
+
# python inference.py --preprocess extfull --size 512
|
16 |
+
|
17 |
+
python inference.py --preprocess full --size 256 --enhancer gfpgan
|
18 |
+
python inference.py --preprocess full --size 512 --enhancer gfpgan
|
19 |
+
|
20 |
+
python inference.py --preprocess full --size 256 --enhancer gfpgan --still
|
21 |
+
python inference.py --preprocess full --size 512 --enhancer gfpgan --still
|
src/gradio_demo.py
CHANGED
@@ -28,6 +28,7 @@ class SadTalker():
|
|
28 |
else:
|
29 |
device = "cpu"
|
30 |
|
|
|
31 |
self.device = device
|
32 |
|
33 |
os.environ['TORCH_HOME']= checkpoint_path
|
|
|
28 |
else:
|
29 |
device = "cpu"
|
30 |
|
31 |
+
print(f'Using device: {device}')
|
32 |
self.device = device
|
33 |
|
34 |
os.environ['TORCH_HOME']= checkpoint_path
|
webui.bat
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
@echo off
|
2 |
+
|
3 |
+
IF NOT EXIST venv (
|
4 |
+
python -m venv venv
|
5 |
+
) ELSE (
|
6 |
+
echo venv folder already exists, skipping creation...
|
7 |
+
)
|
8 |
+
call .\venv\Scripts\activate.bat
|
9 |
+
|
10 |
+
set PYTHON="venv\Scripts\Python.exe"
|
11 |
+
echo venv %PYTHON%
|
12 |
+
|
13 |
+
%PYTHON% App.py
|
14 |
+
|
15 |
+
echo.
|
16 |
+
echo Launch unsuccessful. Exiting.
|
17 |
+
pause
|