lazhrach commited on
Commit
d05c508
1 Parent(s): c275b63

Updated packages and requirements, added device print statement, modified README, added webui.bat and test.sh scripts, and downloaded new models and enhancer weights.

Browse files
Files changed (10) hide show
  1. .gitattributes +18 -1
  2. README.md +5 -4
  3. app.py +102 -102
  4. packages.txt +2 -1
  5. requirements.txt +2 -1
  6. scripts/download_models.sh +32 -0
  7. scripts/extension.py +189 -0
  8. scripts/test.sh +21 -0
  9. src/gradio_demo.py +1 -0
  10. webui.bat +17 -0
.gitattributes CHANGED
@@ -25,7 +25,6 @@
25
  *.safetensors filter=lfs diff=lfs merge=lfs -text
26
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
  *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
  *.tflite filter=lfs diff=lfs merge=lfs -text
30
  *.tgz filter=lfs diff=lfs merge=lfs -text
31
  *.wasm filter=lfs diff=lfs merge=lfs -text
@@ -33,3 +32,21 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  *.safetensors filter=lfs diff=lfs merge=lfs -text
26
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
  *.tar.* filter=lfs diff=lfs merge=lfs -text
 
28
  *.tflite filter=lfs diff=lfs merge=lfs -text
29
  *.tgz filter=lfs diff=lfs merge=lfs -text
30
  *.wasm filter=lfs diff=lfs merge=lfs -text
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ checkpoints/BFM_Fitting/01_MorphableModel.mat filter=lfs diff=lfs merge=lfs -text
36
+ checkpoints/BFM_Fitting/BFM09_model_info.mat filter=lfs diff=lfs merge=lfs -text
37
+ checkpoints/facevid2vid_00189-model.pth.tar filter=lfs diff=lfs merge=lfs -text
38
+ checkpoints/mapping_00229-model.pth.tar filter=lfs diff=lfs merge=lfs -text
39
+ checkpoints/shape_predictor_68_face_landmarks.dat filter=lfs diff=lfs merge=lfs -text
40
+ examples/driven_audio/chinese_news.wav filter=lfs diff=lfs merge=lfs -text
41
+ examples/driven_audio/deyu.wav filter=lfs diff=lfs merge=lfs -text
42
+ examples/driven_audio/eluosi.wav filter=lfs diff=lfs merge=lfs -text
43
+ examples/driven_audio/fayu.wav filter=lfs diff=lfs merge=lfs -text
44
+ examples/driven_audio/imagine.wav filter=lfs diff=lfs merge=lfs -text
45
+ examples/driven_audio/japanese.wav filter=lfs diff=lfs merge=lfs -text
46
+ examples/source_image/art_16.png filter=lfs diff=lfs merge=lfs -text
47
+ examples/source_image/art_17.png filter=lfs diff=lfs merge=lfs -text
48
+ examples/source_image/art_3.png filter=lfs diff=lfs merge=lfs -text
49
+ examples/source_image/art_4.png filter=lfs diff=lfs merge=lfs -text
50
+ examples/source_image/art_5.png filter=lfs diff=lfs merge=lfs -text
51
+ examples/source_image/art_8.png filter=lfs diff=lfs merge=lfs -text
52
+ examples/source_image/art_9.png filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,12 +1,13 @@
1
  ---
2
  title: AvatarTest
3
- emoji: 🔥
4
- colorFrom: blue
5
- colorTo: indigo
6
  sdk: gradio
7
- sdk_version: 4.19.2
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: AvatarTest
3
+ emoji: 😭
4
+ colorFrom: purple
5
+ colorTo: green
6
  sdk: gradio
7
+ sdk_version: 3.23.0
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -1,5 +1,7 @@
1
  import os, sys
 
2
  import tempfile
 
3
  import gradio as gr
4
  from src.gradio_demo import SadTalker
5
  # from src.utils.text2speech import TTSTalker
@@ -28,8 +30,88 @@ def ref_video_fn(path_of_ref_video):
28
  return gr.update(value=False)
29
 
30
  def download_model():
31
- REPO_ID = 'vinthony/SadTalker-V002rc'
32
- snapshot_download(repo_id=REPO_ID, local_dir='./checkpoints', local_dir_use_symlinks=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
  def sadtalker_demo():
35
 
@@ -38,41 +120,37 @@ def sadtalker_demo():
38
  sad_talker = SadTalker(lazy_load=True)
39
  # tts_talker = TTSTalker()
40
 
41
- with gr.Blocks(analytics_enabled=False) as sadtalker_interface:
42
- gr.Markdown("<div align='center'> <h2> 😭 SadTalker: Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation (CVPR 2023) </span> </h2> \
43
- <a style='font-size:18px;color: #efefef' href='https://arxiv.org/abs/2211.12194'>Arxiv</a> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp; \
44
- <a style='font-size:18px;color: #efefef' href='https://sadtalker.github.io'>Homepage</a> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp; \
45
- <a style='font-size:18px;color: #efefef' href='https://github.com/Winfredy/SadTalker'> Github </div>")
46
-
47
-
48
- gr.Markdown("""
49
- <b>You may duplicate the space and upgrade to GPU in settings for better performance and faster inference without waiting in the queue. <a style='display:inline-block' href="https://huggingface.co/spaces/vinthony/SadTalker?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a></b> \
50
- <br/><b>Alternatively, try our GitHub <a href=https://github.com/Winfredy/SadTalker> code </a> on your own GPU. </b> <a style='display:inline-block' href="https://github.com/Winfredy/SadTalker"><img src="https://img.shields.io/github/stars/Winfredy/SadTalker?style=social"/></a> \
51
- """)
52
-
53
- with gr.Row().style(equal_height=False):
54
  with gr.Column(variant='panel'):
55
  with gr.Tabs(elem_id="sadtalker_source_image"):
56
  with gr.TabItem('Source image'):
57
  with gr.Row():
58
- source_image = gr.Image(label="Source image", source="upload", type="filepath", elem_id="img2img_image").style(width=512)
59
 
60
 
61
  with gr.Tabs(elem_id="sadtalker_driven_audio"):
62
  with gr.TabItem('Driving Methods'):
63
- gr.Markdown("Possible driving combinations: <br> 1. Audio only 2. Audio/IDLE Mode + Ref Video(pose, blink, pose+blink) 3. IDLE Mode only 4. Ref Video only (all) ")
64
-
65
  with gr.Row():
66
  driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath")
67
  driven_audio_no = gr.Audio(label="Use IDLE mode, no audio is required", source="upload", type="filepath", visible=False)
68
 
69
- with gr.Column():
70
- use_idle_mode = gr.Checkbox(label="Use Idle Animation")
71
  length_of_audio = gr.Number(value=5, label="The length(seconds) of the generated video.")
72
  use_idle_mode.change(toggle_audio_file, inputs=use_idle_mode, outputs=[driven_audio, driven_audio_no]) # todo
73
-
74
  with gr.Row():
75
- ref_video = gr.Video(label="Reference Video", source="upload", type="filepath", elem_id="vidref").style(width=512)
 
 
 
 
 
 
 
 
 
 
76
 
77
  with gr.Column():
78
  use_ref_video = gr.Checkbox(label="Use Reference Video")
@@ -84,7 +162,6 @@ def sadtalker_demo():
84
  with gr.Column(variant='panel'):
85
  with gr.Tabs(elem_id="sadtalker_checkbox"):
86
  with gr.TabItem('Settings'):
87
- gr.Markdown("need help? please visit our [[best practice page](https://github.com/OpenTalker/SadTalker/blob/main/docs/best_practice.md)] for more detials")
88
  with gr.Column(variant='panel'):
89
  # width = gr.Slider(minimum=64, elem_id="img2img_width", maximum=2048, step=8, label="Manually Crop Width", value=512) # img2img_width
90
  # height = gr.Slider(minimum=64, elem_id="img2img_height", maximum=2048, step=8, label="Manually Crop Height", value=512) # img2img_width
@@ -108,7 +185,7 @@ def sadtalker_demo():
108
  submit = gr.Button('Generate', elem_id="sadtalker_generate", variant='primary')
109
 
110
  with gr.Tabs(elem_id="sadtalker_genearted"):
111
- gen_video = gr.Video(label="Generated video", format="mp4").style(width=256)
112
 
113
 
114
 
@@ -132,84 +209,7 @@ def sadtalker_demo():
132
  blink_every
133
  ],
134
  outputs=[gen_video]
135
- )
136
-
137
- with gr.Row():
138
- examples = [
139
- [
140
- 'examples/source_image/full_body_1.png',
141
- 'examples/driven_audio/bus_chinese.wav',
142
- 'crop',
143
- True,
144
- False
145
- ],
146
- [
147
- 'examples/source_image/full_body_2.png',
148
- 'examples/driven_audio/japanese.wav',
149
- 'crop',
150
- False,
151
- False
152
- ],
153
- [
154
- 'examples/source_image/full3.png',
155
- 'examples/driven_audio/deyu.wav',
156
- 'crop',
157
- False,
158
- True
159
- ],
160
- [
161
- 'examples/source_image/full4.jpeg',
162
- 'examples/driven_audio/eluosi.wav',
163
- 'full',
164
- False,
165
- True
166
- ],
167
- [
168
- 'examples/source_image/full4.jpeg',
169
- 'examples/driven_audio/imagine.wav',
170
- 'full',
171
- True,
172
- True
173
- ],
174
- [
175
- 'examples/source_image/full_body_1.png',
176
- 'examples/driven_audio/bus_chinese.wav',
177
- 'full',
178
- True,
179
- False
180
- ],
181
- [
182
- 'examples/source_image/art_13.png',
183
- 'examples/driven_audio/fayu.wav',
184
- 'resize',
185
- True,
186
- False
187
- ],
188
- [
189
- 'examples/source_image/art_5.png',
190
- 'examples/driven_audio/chinese_news.wav',
191
- 'resize',
192
- False,
193
- False
194
- ],
195
- [
196
- 'examples/source_image/art_5.png',
197
- 'examples/driven_audio/RD_Radio31_000.wav',
198
- 'resize',
199
- True,
200
- True
201
- ],
202
- ]
203
- gr.Examples(examples=examples,
204
- inputs=[
205
- source_image,
206
- driven_audio,
207
- preprocess_type,
208
- is_still_mode,
209
- enhancer],
210
- outputs=[gen_video],
211
- fn=sad_talker.test,
212
- cache_examples=os.getenv('SYSTEM') == 'spaces') #
213
 
214
  return sadtalker_interface
215
 
 
1
  import os, sys
2
+ import uuid
3
  import tempfile
4
+ import pyttsx3
5
  import gradio as gr
6
  from src.gradio_demo import SadTalker
7
  # from src.utils.text2speech import TTSTalker
 
30
  return gr.update(value=False)
31
 
32
  def download_model():
33
+ REPO_ID = 'vinthony/SadTalker-V002rc'
34
+ snapshot_download(REPO_ID)
35
+
36
+ # language : en_US, de_DE, ...
37
+ # gender : VoiceGenderFemale, VoiceGenderMale
38
+ def change_voice(engine, language='ru_ru', gender='male'):
39
+
40
+ selected_voices = []
41
+
42
+ language = language.lower() if language else ''
43
+ gender = gender.lower() if gender else ''
44
+
45
+ for voice in engine.getProperty('voices'):
46
+ voice_appended = False
47
+
48
+ for lang in voice.languages:
49
+
50
+ lang_str = str(lang, 'utf-8')
51
+ print("lang", lang_str)
52
+
53
+ if lang_str and language in lang_str.lower():
54
+ selected_voices.append(voice)
55
+ print("voice appended by lang", voice, lang_str)
56
+ voice_appended = True
57
+ break
58
+
59
+ if voice_appended:
60
+ continue
61
+
62
+ if voice.id and language in voice.id.lower():
63
+ selected_voices.append(voice)
64
+ print("voice appended by id", voice.id)
65
+ continue
66
+
67
+ if voice.name and language in voice.name.lower():
68
+ selected_voices.append(voice)
69
+ print("voice appended by name", voice.name)
70
+ continue
71
+
72
+ for voice in selected_voices:
73
+ if voice.gender and gender in voice.gender.lower():
74
+ engine.setProperty('voice', voice.id)
75
+ print("voice selected by gender", voice.gender)
76
+ return True
77
+
78
+ if voice.id and gender in voice.id.lower():
79
+ engine.setProperty('voice', voice.id)
80
+ print("voice selected by id", voice.id)
81
+ return True
82
+ if voice.name and gender in voice.name.lower():
83
+ engine.setProperty('voice', voice.id)
84
+ print("voice selected by name", voice.name)
85
+ return True
86
+
87
+ if len(selected_voices) > 0:
88
+ engine.setProperty('voice', selected_voices[0].id)
89
+ print("voice selected by default", selected_voices[0].id)
90
+ return True
91
+
92
+ return False
93
+
94
+ def play_text_to_speech(text_input, voice_option):
95
+ engine = pyttsx3.init()
96
+
97
+ change_voice(engine, 'ru', voice_option)
98
+
99
+ print("text_input", text_input)
100
+ print("voice_option", voice_option)
101
+
102
+ time_tag = str(uuid.uuid4())
103
+ save_dir = './results/voice_input'
104
+ os.makedirs(save_dir, exist_ok=True)
105
+ file_name = os.path.join(save_dir, os.path.basename(time_tag + '.wav'))
106
+
107
+ open(file_name, "wb").close()
108
+ engine.say(text_input)
109
+ engine.save_to_file(text_input, file_name)
110
+ engine.runAndWait()
111
+
112
+ print("file saved to", file_name)
113
+
114
+ return file_name
115
 
116
  def sadtalker_demo():
117
 
 
120
  sad_talker = SadTalker(lazy_load=True)
121
  # tts_talker = TTSTalker()
122
 
123
+ with gr.Blocks(analytics_enabled=False) as sadtalker_interface:
124
+ with gr.Row():
 
 
 
 
 
 
 
 
 
 
 
125
  with gr.Column(variant='panel'):
126
  with gr.Tabs(elem_id="sadtalker_source_image"):
127
  with gr.TabItem('Source image'):
128
  with gr.Row():
129
+ source_image = gr.Image(label="Source image", source="upload", type="filepath", elem_id="img2img_image")
130
 
131
 
132
  with gr.Tabs(elem_id="sadtalker_driven_audio"):
133
  with gr.TabItem('Driving Methods'):
 
 
134
  with gr.Row():
135
  driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath")
136
  driven_audio_no = gr.Audio(label="Use IDLE mode, no audio is required", source="upload", type="filepath", visible=False)
137
 
138
+ with gr.Column(visible=False):
139
+ use_idle_mode = gr.Checkbox(label="Use Idle Animation", visible=False)
140
  length_of_audio = gr.Number(value=5, label="The length(seconds) of the generated video.")
141
  use_idle_mode.change(toggle_audio_file, inputs=use_idle_mode, outputs=[driven_audio, driven_audio_no]) # todo
 
142
  with gr.Row():
143
+ text_input = gr.Textbox(label="Enter text", multiline=True)
144
+ voice_option = gr.Radio(['Male', 'Female'], label='Voice Option', value='Female')
145
+ with gr.Row():
146
+ play_button = gr.Button('Text To Speech', variant='primary')
147
+ play_button.click(
148
+ fn=play_text_to_speech,
149
+ inputs=[text_input, voice_option],
150
+ outputs=[driven_audio]
151
+ )
152
+ with gr.Row():
153
+ ref_video = gr.Video(label="Reference Video", source="upload", type="filepath", elem_id="vidref")
154
 
155
  with gr.Column():
156
  use_ref_video = gr.Checkbox(label="Use Reference Video")
 
162
  with gr.Column(variant='panel'):
163
  with gr.Tabs(elem_id="sadtalker_checkbox"):
164
  with gr.TabItem('Settings'):
 
165
  with gr.Column(variant='panel'):
166
  # width = gr.Slider(minimum=64, elem_id="img2img_width", maximum=2048, step=8, label="Manually Crop Width", value=512) # img2img_width
167
  # height = gr.Slider(minimum=64, elem_id="img2img_height", maximum=2048, step=8, label="Manually Crop Height", value=512) # img2img_width
 
185
  submit = gr.Button('Generate', elem_id="sadtalker_generate", variant='primary')
186
 
187
  with gr.Tabs(elem_id="sadtalker_genearted"):
188
+ gen_video = gr.Video(label="Generated video", format="mp4")
189
 
190
 
191
 
 
209
  blink_every
210
  ],
211
  outputs=[gen_video]
212
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
 
214
  return sadtalker_interface
215
 
packages.txt CHANGED
@@ -1,2 +1,3 @@
1
  ffmpeg
2
- libsndfile1
 
 
1
  ffmpeg
2
+ libsndfile1
3
+ libespeak1
requirements.txt CHANGED
@@ -21,4 +21,5 @@ facexlib==0.3.0
21
  dlib-bin
22
  gfpgan
23
  av
24
- safetensors
 
 
21
  dlib-bin
22
  gfpgan
23
  av
24
+ safetensors
25
+ pyttsx3==2.90
scripts/download_models.sh ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ mkdir ./checkpoints
2
+
3
+ # lagency download link
4
+ # wget -nc https://github.com/Winfredy/SadTalker/releases/download/v0.0.2/auido2exp_00300-model.pth -O ./checkpoints/auido2exp_00300-model.pth
5
+ # wget -nc https://github.com/Winfredy/SadTalker/releases/download/v0.0.2/auido2pose_00140-model.pth -O ./checkpoints/auido2pose_00140-model.pth
6
+ # wget -nc https://github.com/Winfredy/SadTalker/releases/download/v0.0.2/epoch_20.pth -O ./checkpoints/epoch_20.pth
7
+ # wget -nc https://github.com/Winfredy/SadTalker/releases/download/v0.0.2/facevid2vid_00189-model.pth.tar -O ./checkpoints/facevid2vid_00189-model.pth.tar
8
+ # wget -nc https://github.com/Winfredy/SadTalker/releases/download/v0.0.2/shape_predictor_68_face_landmarks.dat -O ./checkpoints/shape_predictor_68_face_landmarks.dat
9
+ # wget -nc https://github.com/Winfredy/SadTalker/releases/download/v0.0.2/wav2lip.pth -O ./checkpoints/wav2lip.pth
10
+ # wget -nc https://github.com/Winfredy/SadTalker/releases/download/v0.0.2/mapping_00229-model.pth.tar -O ./checkpoints/mapping_00229-model.pth.tar
11
+ # wget -nc https://github.com/Winfredy/SadTalker/releases/download/v0.0.2/mapping_00109-model.pth.tar -O ./checkpoints/mapping_00109-model.pth.tar
12
+ # wget -nc https://github.com/Winfredy/SadTalker/releases/download/v0.0.2/hub.zip -O ./checkpoints/hub.zip
13
+ # unzip -n ./checkpoints/hub.zip -d ./checkpoints/
14
+
15
+
16
+ #### download the new links.
17
+ wget -nc https://github.com/OpenTalker/SadTalker/releases/download/v0.0.2-rc/mapping_00109-model.pth.tar -O ./checkpoints/mapping_00109-model.pth.tar
18
+ wget -nc https://github.com/OpenTalker/SadTalker/releases/download/v0.0.2-rc/mapping_00229-model.pth.tar -O ./checkpoints/mapping_00229-model.pth.tar
19
+ wget -nc https://github.com/OpenTalker/SadTalker/releases/download/v0.0.2-rc/SadTalker_V0.0.2_256.safetensors -O ./checkpoints/SadTalker_V0.0.2_256.safetensors
20
+ wget -nc https://github.com/OpenTalker/SadTalker/releases/download/v0.0.2-rc/SadTalker_V0.0.2_512.safetensors -O ./checkpoints/SadTalker_V0.0.2_512.safetensors
21
+
22
+
23
+ # wget -nc https://github.com/Winfredy/SadTalker/releases/download/v0.0.2/BFM_Fitting.zip -O ./checkpoints/BFM_Fitting.zip
24
+ # unzip -n ./checkpoints/BFM_Fitting.zip -d ./checkpoints/
25
+
26
+ ### enhancer
27
+ mkdir -p ./gfpgan/weights
28
+ wget -nc https://github.com/xinntao/facexlib/releases/download/v0.1.0/alignment_WFLW_4HG.pth -O ./gfpgan/weights/alignment_WFLW_4HG.pth
29
+ wget -nc https://github.com/xinntao/facexlib/releases/download/v0.1.0/detection_Resnet50_Final.pth -O ./gfpgan/weights/detection_Resnet50_Final.pth
30
+ wget -nc https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.4.pth -O ./gfpgan/weights/GFPGANv1.4.pth
31
+ wget -nc https://github.com/xinntao/facexlib/releases/download/v0.2.2/parsing_parsenet.pth -O ./gfpgan/weights/parsing_parsenet.pth
32
+
scripts/extension.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, sys
2
+ from pathlib import Path
3
+ import tempfile
4
+ import gradio as gr
5
+ from modules.call_queue import wrap_gradio_gpu_call, wrap_queued_call
6
+ from modules.shared import opts, OptionInfo
7
+ from modules import shared, paths, script_callbacks
8
+ import launch
9
+ import glob
10
+ from huggingface_hub import snapshot_download
11
+
12
+
13
+
14
+ def check_all_files_safetensor(current_dir):
15
+ kv = {
16
+ "SadTalker_V0.0.2_256.safetensors": "sadtalker-256",
17
+ "SadTalker_V0.0.2_512.safetensors": "sadtalker-512",
18
+ "mapping_00109-model.pth.tar" : "mapping-109" ,
19
+ "mapping_00229-model.pth.tar" : "mapping-229" ,
20
+ }
21
+
22
+ if not os.path.isdir(current_dir):
23
+ return False
24
+
25
+ dirs = os.listdir(current_dir)
26
+
27
+ for f in dirs:
28
+ if f in kv.keys():
29
+ del kv[f]
30
+
31
+ return len(kv.keys()) == 0
32
+
33
+ def check_all_files(current_dir):
34
+ kv = {
35
+ "auido2exp_00300-model.pth": "audio2exp",
36
+ "auido2pose_00140-model.pth": "audio2pose",
37
+ "epoch_20.pth": "face_recon",
38
+ "facevid2vid_00189-model.pth.tar": "face-render",
39
+ "mapping_00109-model.pth.tar" : "mapping-109" ,
40
+ "mapping_00229-model.pth.tar" : "mapping-229" ,
41
+ "wav2lip.pth": "wav2lip",
42
+ "shape_predictor_68_face_landmarks.dat": "dlib",
43
+ }
44
+
45
+ if not os.path.isdir(current_dir):
46
+ return False
47
+
48
+ dirs = os.listdir(current_dir)
49
+
50
+ for f in dirs:
51
+ if f in kv.keys():
52
+ del kv[f]
53
+
54
+ return len(kv.keys()) == 0
55
+
56
+
57
+
58
+ def download_model(local_dir='./checkpoints'):
59
+ REPO_ID = 'vinthony/SadTalker'
60
+ snapshot_download(repo_id=REPO_ID, local_dir=local_dir, local_dir_use_symlinks=False)
61
+
62
+ def get_source_image(image):
63
+ return image
64
+
65
+ def get_img_from_txt2img(x):
66
+ talker_path = Path(paths.script_path) / "outputs"
67
+ imgs_from_txt_dir = str(talker_path / "txt2img-images/")
68
+ imgs = glob.glob(imgs_from_txt_dir+'/*/*.png')
69
+ imgs.sort(key=lambda x:os.path.getmtime(os.path.join(imgs_from_txt_dir, x)))
70
+ img_from_txt_path = os.path.join(imgs_from_txt_dir, imgs[-1])
71
+ return img_from_txt_path, img_from_txt_path
72
+
73
+ def get_img_from_img2img(x):
74
+ talker_path = Path(paths.script_path) / "outputs"
75
+ imgs_from_img_dir = str(talker_path / "img2img-images/")
76
+ imgs = glob.glob(imgs_from_img_dir+'/*/*.png')
77
+ imgs.sort(key=lambda x:os.path.getmtime(os.path.join(imgs_from_img_dir, x)))
78
+ img_from_img_path = os.path.join(imgs_from_img_dir, imgs[-1])
79
+ return img_from_img_path, img_from_img_path
80
+
81
+ def get_default_checkpoint_path():
82
+ # check the path of models/checkpoints and extensions/
83
+ checkpoint_path = Path(paths.script_path) / "models"/ "SadTalker"
84
+ extension_checkpoint_path = Path(paths.script_path) / "extensions"/ "SadTalker" / "checkpoints"
85
+
86
+ if check_all_files_safetensor(checkpoint_path):
87
+ # print('founding sadtalker checkpoint in ' + str(checkpoint_path))
88
+ return checkpoint_path
89
+
90
+ if check_all_files_safetensor(extension_checkpoint_path):
91
+ # print('founding sadtalker checkpoint in ' + str(extension_checkpoint_path))
92
+ return extension_checkpoint_path
93
+
94
+ if check_all_files(checkpoint_path):
95
+ # print('founding sadtalker checkpoint in ' + str(checkpoint_path))
96
+ return checkpoint_path
97
+
98
+ if check_all_files(extension_checkpoint_path):
99
+ # print('founding sadtalker checkpoint in ' + str(extension_checkpoint_path))
100
+ return extension_checkpoint_path
101
+
102
+ return None
103
+
104
+
105
+
106
+ def install():
107
+
108
+ kv = {
109
+ "face_alignment": "face-alignment==1.3.5",
110
+ "imageio": "imageio==2.19.3",
111
+ "imageio_ffmpeg": "imageio-ffmpeg==0.4.7",
112
+ "librosa":"librosa==0.8.0",
113
+ "pydub":"pydub==0.25.1",
114
+ "scipy":"scipy==1.8.1",
115
+ "tqdm": "tqdm",
116
+ "yacs":"yacs==0.1.8",
117
+ "yaml": "pyyaml",
118
+ "av":"av",
119
+ "gfpgan": "gfpgan",
120
+ }
121
+
122
+ # # dlib is not necessary currently
123
+ # if 'darwin' in sys.platform:
124
+ # kv['dlib'] = "dlib"
125
+ # else:
126
+ # kv['dlib'] = 'dlib-bin'
127
+
128
+ # #### we need to have a newer version of imageio for our method.
129
+ # launch.run_pip("install imageio==2.19.3", "requirements for SadTalker")
130
+
131
+ for k,v in kv.items():
132
+ if not launch.is_installed(k):
133
+ print(k, launch.is_installed(k))
134
+ launch.run_pip("install "+ v, "requirements for SadTalker")
135
+
136
+ if os.getenv('SADTALKER_CHECKPOINTS'):
137
+ print('load Sadtalker Checkpoints from '+ os.getenv('SADTALKER_CHECKPOINTS'))
138
+
139
+ elif get_default_checkpoint_path() is not None:
140
+ os.environ['SADTALKER_CHECKPOINTS'] = str(get_default_checkpoint_path())
141
+ else:
142
+
143
+ print(
144
+ """"
145
+ SadTalker will not support download all the files from hugging face, which will take a long time.
146
+
147
+ please manually set the SADTALKER_CHECKPOINTS in `webui_user.bat`(windows) or `webui_user.sh`(linux)
148
+ """
149
+ )
150
+
151
+ # python = sys.executable
152
+
153
+ # launch.run(f'"{python}" -m pip uninstall -y huggingface_hub', live=True)
154
+ # launch.run(f'"{python}" -m pip install --upgrade git+https://github.com/huggingface/huggingface_hub@main', live=True)
155
+ # ### run the scripts to downlod models to correct localtion.
156
+ # # print('download models for SadTalker')
157
+ # # launch.run("cd " + paths.script_path+"/extensions/SadTalker && bash ./scripts/download_models.sh", live=True)
158
+ # # print('SadTalker is successfully installed!')
159
+ # download_model(paths.script_path+'/extensions/SadTalker/checkpoints')
160
+
161
+
162
+ def on_ui_tabs():
163
+ install()
164
+
165
+ sys.path.extend([paths.script_path+'/extensions/SadTalker'])
166
+
167
+ repo_dir = paths.script_path+'/extensions/SadTalker/'
168
+
169
+ result_dir = opts.sadtalker_result_dir
170
+ os.makedirs(result_dir, exist_ok=True)
171
+
172
+ from app_sadtalker import sadtalker_demo
173
+
174
+ if os.getenv('SADTALKER_CHECKPOINTS'):
175
+ checkpoint_path = os.getenv('SADTALKER_CHECKPOINTS')
176
+ else:
177
+ checkpoint_path = repo_dir+'checkpoints/'
178
+
179
+ audio_to_video = sadtalker_demo(checkpoint_path=checkpoint_path, config_path=repo_dir+'src/config', warpfn = wrap_queued_call)
180
+
181
+ return [(audio_to_video, "SadTalker", "extension")]
182
+
183
+ def on_ui_settings():
184
+ talker_path = Path(paths.script_path) / "outputs"
185
+ section = ('extension', "SadTalker")
186
+ opts.add_option("sadtalker_result_dir", OptionInfo(str(talker_path / "SadTalker/"), "Path to save results of sadtalker", section=section))
187
+
188
+ script_callbacks.on_ui_settings(on_ui_settings)
189
+ script_callbacks.on_ui_tabs(on_ui_tabs)
scripts/test.sh ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ### some test command before commit.
2
+ # python inference.py --preprocess crop --size 256
3
+ # python inference.py --preprocess crop --size 512
4
+
5
+ # python inference.py --preprocess extcrop --size 256
6
+ # python inference.py --preprocess extcrop --size 512
7
+
8
+ # python inference.py --preprocess resize --size 256
9
+ # python inference.py --preprocess resize --size 512
10
+
11
+ # python inference.py --preprocess full --size 256
12
+ # python inference.py --preprocess full --size 512
13
+
14
+ # python inference.py --preprocess extfull --size 256
15
+ # python inference.py --preprocess extfull --size 512
16
+
17
+ python inference.py --preprocess full --size 256 --enhancer gfpgan
18
+ python inference.py --preprocess full --size 512 --enhancer gfpgan
19
+
20
+ python inference.py --preprocess full --size 256 --enhancer gfpgan --still
21
+ python inference.py --preprocess full --size 512 --enhancer gfpgan --still
src/gradio_demo.py CHANGED
@@ -28,6 +28,7 @@ class SadTalker():
28
  else:
29
  device = "cpu"
30
 
 
31
  self.device = device
32
 
33
  os.environ['TORCH_HOME']= checkpoint_path
 
28
  else:
29
  device = "cpu"
30
 
31
+ print(f'Using device: {device}')
32
  self.device = device
33
 
34
  os.environ['TORCH_HOME']= checkpoint_path
webui.bat ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @echo off
2
+
3
+ IF NOT EXIST venv (
4
+ python -m venv venv
5
+ ) ELSE (
6
+ echo venv folder already exists, skipping creation...
7
+ )
8
+ call .\venv\Scripts\activate.bat
9
+
10
+ set PYTHON="venv\Scripts\Python.exe"
11
+ echo venv %PYTHON%
12
+
13
+ %PYTHON% App.py
14
+
15
+ echo.
16
+ echo Launch unsuccessful. Exiting.
17
+ pause