Spaces:

lazhrach
/

AvatarTest

Runtime error

App Files Files

lazhrach commited on Mar 7

Commit

d05c508

•

1 Parent(s): c275b63

Updated packages and requirements, added device print statement, modified README, added webui.bat and test.sh scripts, and downloaded new models and enhancer weights.

Browse files

Files changed (10) hide show

.gitattributes +18 -1
README.md +5 -4
app.py +102 -102
packages.txt +2 -1
requirements.txt +2 -1
scripts/download_models.sh +32 -0
scripts/extension.py +189 -0
scripts/test.sh +21 -0
src/gradio_demo.py +1 -0
webui.bat +17 -0

.gitattributes CHANGED Viewed

@@ -25,7 +25,6 @@
 *.safetensors filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text
@@ -33,3 +32,21 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.safetensors filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+checkpoints/BFM_Fitting/01_MorphableModel.mat filter=lfs diff=lfs merge=lfs -text
+checkpoints/BFM_Fitting/BFM09_model_info.mat filter=lfs diff=lfs merge=lfs -text
+checkpoints/facevid2vid_00189-model.pth.tar filter=lfs diff=lfs merge=lfs -text
+checkpoints/mapping_00229-model.pth.tar filter=lfs diff=lfs merge=lfs -text
+checkpoints/shape_predictor_68_face_landmarks.dat filter=lfs diff=lfs merge=lfs -text
+examples/driven_audio/chinese_news.wav filter=lfs diff=lfs merge=lfs -text
+examples/driven_audio/deyu.wav filter=lfs diff=lfs merge=lfs -text
+examples/driven_audio/eluosi.wav filter=lfs diff=lfs merge=lfs -text
+examples/driven_audio/fayu.wav filter=lfs diff=lfs merge=lfs -text
+examples/driven_audio/imagine.wav filter=lfs diff=lfs merge=lfs -text
+examples/driven_audio/japanese.wav filter=lfs diff=lfs merge=lfs -text
+examples/source_image/art_16.png filter=lfs diff=lfs merge=lfs -text
+examples/source_image/art_17.png filter=lfs diff=lfs merge=lfs -text
+examples/source_image/art_3.png filter=lfs diff=lfs merge=lfs -text
+examples/source_image/art_4.png filter=lfs diff=lfs merge=lfs -text
+examples/source_image/art_5.png filter=lfs diff=lfs merge=lfs -text
+examples/source_image/art_8.png filter=lfs diff=lfs merge=lfs -text
+examples/source_image/art_9.png filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,12 +1,13 @@
 ---
 title: AvatarTest
-emoji: 🔥
-colorFrom: blue
-colorTo: indigo
 sdk: gradio
-sdk_version: 4.19.2
 app_file: app.py
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: AvatarTest
+emoji: 😭
+colorFrom: purple
+colorTo: green
 sdk: gradio
+sdk_version: 3.23.0
 app_file: app.py
 pinned: false
+license: mit
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import os, sys
 import tempfile
 import gradio as gr
 from src.gradio_demo import SadTalker
 # from src.utils.text2speech import TTSTalker
@@ -28,8 +30,88 @@ def ref_video_fn(path_of_ref_video):
         return gr.update(value=False)
 def download_model():
-    REPO_ID = 'vinthony/SadTalker-V002rc'
-    snapshot_download(repo_id=REPO_ID, local_dir='./checkpoints', local_dir_use_symlinks=True)
 def sadtalker_demo():
@@ -38,41 +120,37 @@ def sadtalker_demo():
     sad_talker = SadTalker(lazy_load=True)
     # tts_talker = TTSTalker()
-    with gr.Blocks(analytics_enabled=False) as sadtalker_interface:
-        gr.Markdown("<div align='center'> <h2> 😭 SadTalker: Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation (CVPR 2023) </span> </h2> \
-                    <a style='font-size:18px;color: #efefef' href='https://arxiv.org/abs/2211.12194'>Arxiv</a> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp; \
-                    <a style='font-size:18px;color: #efefef' href='https://sadtalker.github.io'>Homepage</a>  &nbsp;&nbsp;&nbsp;&nbsp;&nbsp; \
-                     <a style='font-size:18px;color: #efefef' href='https://github.com/Winfredy/SadTalker'> Github </div>")
-        gr.Markdown("""
-        <b>You may duplicate the space and upgrade to GPU in settings for better performance and faster inference without waiting in the queue. <a style='display:inline-block' href="https://huggingface.co/spaces/vinthony/SadTalker?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a></b> \
-        <br/><b>Alternatively, try our GitHub <a href=https://github.com/Winfredy/SadTalker> code </a> on your own GPU. </b> <a style='display:inline-block' href="https://github.com/Winfredy/SadTalker"><img src="https://img.shields.io/github/stars/Winfredy/SadTalker?style=social"/></a> \
-        """)
-        with gr.Row().style(equal_height=False):
             with gr.Column(variant='panel'):
                 with gr.Tabs(elem_id="sadtalker_source_image"):
                     with gr.TabItem('Source image'):
                         with gr.Row():
-                            source_image = gr.Image(label="Source image", source="upload", type="filepath", elem_id="img2img_image").style(width=512)
                 with gr.Tabs(elem_id="sadtalker_driven_audio"):
                     with gr.TabItem('Driving Methods'):
-                        gr.Markdown("Possible driving combinations: <br> 1. Audio only 2. Audio/IDLE Mode + Ref Video(pose, blink, pose+blink) 3. IDLE Mode only 4. Ref Video only (all) ")
                         with gr.Row():
                             driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath")
                             driven_audio_no = gr.Audio(label="Use IDLE mode, no audio is required", source="upload", type="filepath", visible=False)
-                            with gr.Column():
-                                use_idle_mode = gr.Checkbox(label="Use Idle Animation")
                                 length_of_audio = gr.Number(value=5, label="The length(seconds) of the generated video.")
                                 use_idle_mode.change(toggle_audio_file, inputs=use_idle_mode, outputs=[driven_audio, driven_audio_no]) # todo
                         with gr.Row():
-                            ref_video = gr.Video(label="Reference Video", source="upload", type="filepath", elem_id="vidref").style(width=512)
                             with gr.Column():
                                 use_ref_video = gr.Checkbox(label="Use Reference Video")
@@ -84,7 +162,6 @@ def sadtalker_demo():
             with gr.Column(variant='panel'):
                 with gr.Tabs(elem_id="sadtalker_checkbox"):
                     with gr.TabItem('Settings'):
-                        gr.Markdown("need help? please visit our [[best practice page](https://github.com/OpenTalker/SadTalker/blob/main/docs/best_practice.md)] for more detials")
                         with gr.Column(variant='panel'):
                             # width = gr.Slider(minimum=64, elem_id="img2img_width", maximum=2048, step=8, label="Manually Crop Width", value=512) # img2img_width
                             # height = gr.Slider(minimum=64, elem_id="img2img_height", maximum=2048, step=8, label="Manually Crop Height", value=512) # img2img_width
@@ -108,7 +185,7 @@ def sadtalker_demo():
                             submit = gr.Button('Generate', elem_id="sadtalker_generate", variant='primary')
                 with gr.Tabs(elem_id="sadtalker_genearted"):
-                        gen_video = gr.Video(label="Generated video", format="mp4").style(width=256)
@@ -132,84 +209,7 @@ def sadtalker_demo():
                         blink_every
                         ],
                 outputs=[gen_video]
-                )
-        with gr.Row():
-            examples = [
-                [
-                    'examples/source_image/full_body_1.png',
-                    'examples/driven_audio/bus_chinese.wav',
-                    'crop',
-                    True,
-                    False
-                ],
-                [
-                    'examples/source_image/full_body_2.png',
-                    'examples/driven_audio/japanese.wav',
-                    'crop',
-                    False,
-                    False
-                ],
-                [
-                    'examples/source_image/full3.png',
-                    'examples/driven_audio/deyu.wav',
-                    'crop',
-                    False,
-                    True
-                ],
-                [
-                    'examples/source_image/full4.jpeg',
-                    'examples/driven_audio/eluosi.wav',
-                    'full',
-                    False,
-                    True
-                ],
-                [
-                    'examples/source_image/full4.jpeg',
-                    'examples/driven_audio/imagine.wav',
-                    'full',
-                    True,
-                    True
-                ],
-                [
-                    'examples/source_image/full_body_1.png',
-                    'examples/driven_audio/bus_chinese.wav',
-                    'full',
-                    True,
-                    False
-                ],
-                [
-                    'examples/source_image/art_13.png',
-                    'examples/driven_audio/fayu.wav',
-                    'resize',
-                    True,
-                    False
-                ],
-                [
-                    'examples/source_image/art_5.png',
-                    'examples/driven_audio/chinese_news.wav',
-                    'resize',
-                    False,
-                    False
-                ],
-                [
-                    'examples/source_image/art_5.png',
-                    'examples/driven_audio/RD_Radio31_000.wav',
-                    'resize',
-                    True,
-                    True
-                ],
-            ]
-            gr.Examples(examples=examples,
-                        inputs=[
-                            source_image,
-                            driven_audio,
-                            preprocess_type,
-                            is_still_mode,
-                            enhancer],
-                        outputs=[gen_video],
-                        fn=sad_talker.test,
-                        cache_examples=os.getenv('SYSTEM') == 'spaces') #
     return sadtalker_interface

 import os, sys
+import uuid
 import tempfile
+import pyttsx3
 import gradio as gr
 from src.gradio_demo import SadTalker
 # from src.utils.text2speech import TTSTalker
         return gr.update(value=False)
 def download_model():
+     REPO_ID = 'vinthony/SadTalker-V002rc'
+     snapshot_download(REPO_ID)
+# language  : en_US, de_DE, ...
+# gender    : VoiceGenderFemale, VoiceGenderMale
+def change_voice(engine, language='ru_ru', gender='male'):
+    selected_voices = []
+    language = language.lower() if language else ''
+    gender = gender.lower() if gender else ''
+    for voice in engine.getProperty('voices'):
+        voice_appended = False
+        for lang in voice.languages:
+            lang_str = str(lang, 'utf-8')
+            print("lang", lang_str)
+            if lang_str and language in lang_str.lower():
+                selected_voices.append(voice)
+                print("voice appended by lang", voice, lang_str)
+                voice_appended = True
+                break
+        if voice_appended:
+            continue
+        if voice.id and language in voice.id.lower():
+            selected_voices.append(voice)
+            print("voice appended by id", voice.id)
+            continue
+        if voice.name and language in voice.name.lower():
+            selected_voices.append(voice)
+            print("voice appended by name", voice.name)
+            continue
+    for voice in selected_voices:
+        if voice.gender and gender in voice.gender.lower():
+            engine.setProperty('voice', voice.id)
+            print("voice selected by gender", voice.gender)
+            return True
+        if voice.id and gender in voice.id.lower():
+            engine.setProperty('voice', voice.id)
+            print("voice selected by id", voice.id)
+            return True
+        if voice.name and gender in voice.name.lower():
+            engine.setProperty('voice', voice.id)
+            print("voice selected by name", voice.name)
+            return True
+    if len(selected_voices) > 0:
+        engine.setProperty('voice', selected_voices[0].id)
+        print("voice selected by default", selected_voices[0].id)
+        return True
+    return False
+def play_text_to_speech(text_input, voice_option):
+    engine = pyttsx3.init()
+    change_voice(engine, 'ru', voice_option)
+    print("text_input", text_input)
+    print("voice_option", voice_option)
+    time_tag = str(uuid.uuid4())
+    save_dir = './results/voice_input'
+    os.makedirs(save_dir, exist_ok=True)
+    file_name = os.path.join(save_dir, os.path.basename(time_tag + '.wav'))
+    open(file_name, "wb").close()
+    engine.say(text_input)
+    engine.save_to_file(text_input, file_name)
+    engine.runAndWait()
+    print("file saved to", file_name)
+    return file_name
 def sadtalker_demo():
     sad_talker = SadTalker(lazy_load=True)
     # tts_talker = TTSTalker()
+    with gr.Blocks(analytics_enabled=False) as sadtalker_interface:
+        with gr.Row():
             with gr.Column(variant='panel'):
                 with gr.Tabs(elem_id="sadtalker_source_image"):
                     with gr.TabItem('Source image'):
                         with gr.Row():
+                            source_image = gr.Image(label="Source image", source="upload", type="filepath", elem_id="img2img_image")
                 with gr.Tabs(elem_id="sadtalker_driven_audio"):
                     with gr.TabItem('Driving Methods'):
                         with gr.Row():
                             driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath")
                             driven_audio_no = gr.Audio(label="Use IDLE mode, no audio is required", source="upload", type="filepath", visible=False)
+                            with gr.Column(visible=False):
+                                use_idle_mode = gr.Checkbox(label="Use Idle Animation", visible=False)
                                 length_of_audio = gr.Number(value=5, label="The length(seconds) of the generated video.")
                                 use_idle_mode.change(toggle_audio_file, inputs=use_idle_mode, outputs=[driven_audio, driven_audio_no]) # todo
                         with gr.Row():
+                            text_input = gr.Textbox(label="Enter text", multiline=True)
+                            voice_option = gr.Radio(['Male', 'Female'], label='Voice Option', value='Female')
+                        with gr.Row():
+                            play_button = gr.Button('Text To Speech', variant='primary')
+                            play_button.click(
+                                fn=play_text_to_speech,
+                                inputs=[text_input, voice_option],
+                                outputs=[driven_audio]
+                            )
+                        with gr.Row():
+                            ref_video = gr.Video(label="Reference Video", source="upload", type="filepath", elem_id="vidref")
                             with gr.Column():
                                 use_ref_video = gr.Checkbox(label="Use Reference Video")
             with gr.Column(variant='panel'):
                 with gr.Tabs(elem_id="sadtalker_checkbox"):
                     with gr.TabItem('Settings'):
                         with gr.Column(variant='panel'):
                             # width = gr.Slider(minimum=64, elem_id="img2img_width", maximum=2048, step=8, label="Manually Crop Width", value=512) # img2img_width
                             # height = gr.Slider(minimum=64, elem_id="img2img_height", maximum=2048, step=8, label="Manually Crop Height", value=512) # img2img_width
                             submit = gr.Button('Generate', elem_id="sadtalker_generate", variant='primary')
                 with gr.Tabs(elem_id="sadtalker_genearted"):
+                        gen_video = gr.Video(label="Generated video", format="mp4")
                         blink_every
                         ],
                 outputs=[gen_video]
+                )
     return sadtalker_interface

packages.txt CHANGED Viewed

@@ -1,2 +1,3 @@
 ffmpeg
-libsndfile1

 ffmpeg
+libsndfile1
+libespeak1

requirements.txt CHANGED Viewed

@@ -21,4 +21,5 @@ facexlib==0.3.0
 dlib-bin
 gfpgan
 av
-safetensors

 dlib-bin
 gfpgan
 av
+safetensors
+pyttsx3==2.90

scripts/download_models.sh ADDED Viewed

	@@ -0,0 +1,32 @@

+mkdir ./checkpoints
+# lagency download link
+# wget -nc https://github.com/Winfredy/SadTalker/releases/download/v0.0.2/auido2exp_00300-model.pth -O ./checkpoints/auido2exp_00300-model.pth
+# wget -nc https://github.com/Winfredy/SadTalker/releases/download/v0.0.2/auido2pose_00140-model.pth -O ./checkpoints/auido2pose_00140-model.pth
+# wget -nc https://github.com/Winfredy/SadTalker/releases/download/v0.0.2/epoch_20.pth -O ./checkpoints/epoch_20.pth
+# wget -nc https://github.com/Winfredy/SadTalker/releases/download/v0.0.2/facevid2vid_00189-model.pth.tar -O ./checkpoints/facevid2vid_00189-model.pth.tar
+# wget -nc https://github.com/Winfredy/SadTalker/releases/download/v0.0.2/shape_predictor_68_face_landmarks.dat -O ./checkpoints/shape_predictor_68_face_landmarks.dat
+# wget -nc https://github.com/Winfredy/SadTalker/releases/download/v0.0.2/wav2lip.pth -O ./checkpoints/wav2lip.pth
+# wget -nc https://github.com/Winfredy/SadTalker/releases/download/v0.0.2/mapping_00229-model.pth.tar -O ./checkpoints/mapping_00229-model.pth.tar
+# wget -nc https://github.com/Winfredy/SadTalker/releases/download/v0.0.2/mapping_00109-model.pth.tar -O ./checkpoints/mapping_00109-model.pth.tar
+# wget -nc https://github.com/Winfredy/SadTalker/releases/download/v0.0.2/hub.zip -O ./checkpoints/hub.zip
+# unzip -n ./checkpoints/hub.zip -d ./checkpoints/
+#### download the new links.
+wget -nc https://github.com/OpenTalker/SadTalker/releases/download/v0.0.2-rc/mapping_00109-model.pth.tar -O  ./checkpoints/mapping_00109-model.pth.tar
+wget -nc https://github.com/OpenTalker/SadTalker/releases/download/v0.0.2-rc/mapping_00229-model.pth.tar -O  ./checkpoints/mapping_00229-model.pth.tar
+wget -nc https://github.com/OpenTalker/SadTalker/releases/download/v0.0.2-rc/SadTalker_V0.0.2_256.safetensors -O  ./checkpoints/SadTalker_V0.0.2_256.safetensors
+wget -nc https://github.com/OpenTalker/SadTalker/releases/download/v0.0.2-rc/SadTalker_V0.0.2_512.safetensors -O  ./checkpoints/SadTalker_V0.0.2_512.safetensors
+# wget -nc https://github.com/Winfredy/SadTalker/releases/download/v0.0.2/BFM_Fitting.zip -O ./checkpoints/BFM_Fitting.zip
+# unzip -n ./checkpoints/BFM_Fitting.zip -d ./checkpoints/
+### enhancer
+mkdir -p ./gfpgan/weights
+wget -nc https://github.com/xinntao/facexlib/releases/download/v0.1.0/alignment_WFLW_4HG.pth -O ./gfpgan/weights/alignment_WFLW_4HG.pth
+wget -nc https://github.com/xinntao/facexlib/releases/download/v0.1.0/detection_Resnet50_Final.pth -O ./gfpgan/weights/detection_Resnet50_Final.pth
+wget -nc https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.4.pth -O ./gfpgan/weights/GFPGANv1.4.pth
+wget -nc https://github.com/xinntao/facexlib/releases/download/v0.2.2/parsing_parsenet.pth -O ./gfpgan/weights/parsing_parsenet.pth

scripts/extension.py ADDED Viewed

	@@ -0,0 +1,189 @@

+import os, sys
+from pathlib import Path
+import tempfile
+import gradio as gr
+from modules.call_queue import wrap_gradio_gpu_call, wrap_queued_call
+from modules.shared import opts, OptionInfo
+from modules import shared, paths, script_callbacks
+import launch
+import glob
+from huggingface_hub import snapshot_download
+def check_all_files_safetensor(current_dir):
+    kv = {
+        "SadTalker_V0.0.2_256.safetensors": "sadtalker-256",
+        "SadTalker_V0.0.2_512.safetensors": "sadtalker-512",
+        "mapping_00109-model.pth.tar" : "mapping-109" ,
+        "mapping_00229-model.pth.tar" : "mapping-229" ,
+    }
+    if not os.path.isdir(current_dir):
+        return False
+    dirs = os.listdir(current_dir)
+    for f in dirs:
+        if f in kv.keys():
+            del kv[f]
+    return len(kv.keys()) == 0
+def check_all_files(current_dir):
+    kv = {
+        "auido2exp_00300-model.pth": "audio2exp",
+        "auido2pose_00140-model.pth": "audio2pose",
+        "epoch_20.pth": "face_recon",
+        "facevid2vid_00189-model.pth.tar": "face-render",
+        "mapping_00109-model.pth.tar" : "mapping-109" ,
+        "mapping_00229-model.pth.tar" : "mapping-229" ,
+        "wav2lip.pth": "wav2lip",
+        "shape_predictor_68_face_landmarks.dat": "dlib",
+    }
+    if not os.path.isdir(current_dir):
+        return False
+    dirs = os.listdir(current_dir)
+    for f in dirs:
+        if f in kv.keys():
+            del kv[f]
+    return len(kv.keys()) == 0
+def download_model(local_dir='./checkpoints'):
+    REPO_ID = 'vinthony/SadTalker'
+    snapshot_download(repo_id=REPO_ID, local_dir=local_dir, local_dir_use_symlinks=False)
+def get_source_image(image):
+        return image
+def get_img_from_txt2img(x):
+    talker_path = Path(paths.script_path) / "outputs"
+    imgs_from_txt_dir = str(talker_path / "txt2img-images/")
+    imgs = glob.glob(imgs_from_txt_dir+'/*/*.png')
+    imgs.sort(key=lambda x:os.path.getmtime(os.path.join(imgs_from_txt_dir, x)))
+    img_from_txt_path = os.path.join(imgs_from_txt_dir, imgs[-1])
+    return img_from_txt_path, img_from_txt_path
+def get_img_from_img2img(x):
+    talker_path = Path(paths.script_path) / "outputs"
+    imgs_from_img_dir = str(talker_path / "img2img-images/")
+    imgs = glob.glob(imgs_from_img_dir+'/*/*.png')
+    imgs.sort(key=lambda x:os.path.getmtime(os.path.join(imgs_from_img_dir, x)))
+    img_from_img_path = os.path.join(imgs_from_img_dir, imgs[-1])
+    return img_from_img_path, img_from_img_path
+def get_default_checkpoint_path():
+    # check the path of models/checkpoints and extensions/
+    checkpoint_path = Path(paths.script_path) / "models"/ "SadTalker"
+    extension_checkpoint_path = Path(paths.script_path) / "extensions"/ "SadTalker" / "checkpoints"
+    if check_all_files_safetensor(checkpoint_path):
+        # print('founding sadtalker checkpoint in ' + str(checkpoint_path))
+        return checkpoint_path
+    if check_all_files_safetensor(extension_checkpoint_path):
+        # print('founding sadtalker checkpoint in ' + str(extension_checkpoint_path))
+        return extension_checkpoint_path
+    if check_all_files(checkpoint_path):
+        # print('founding sadtalker checkpoint in ' + str(checkpoint_path))
+        return checkpoint_path
+    if check_all_files(extension_checkpoint_path):
+        # print('founding sadtalker checkpoint in ' + str(extension_checkpoint_path))
+        return extension_checkpoint_path
+    return None
+def install():
+    kv = {
+        "face_alignment": "face-alignment==1.3.5",
+        "imageio": "imageio==2.19.3",
+        "imageio_ffmpeg": "imageio-ffmpeg==0.4.7",
+        "librosa":"librosa==0.8.0",
+        "pydub":"pydub==0.25.1",
+        "scipy":"scipy==1.8.1",
+        "tqdm": "tqdm",
+        "yacs":"yacs==0.1.8",
+        "yaml": "pyyaml",
+        "av":"av",
+        "gfpgan": "gfpgan",
+    }
+    # # dlib is not necessary currently
+    # if 'darwin' in sys.platform:
+    #     kv['dlib'] = "dlib"
+    # else:
+    #     kv['dlib'] = 'dlib-bin'
+    # #### we need to have a newer version of imageio for our method.
+    # launch.run_pip("install imageio==2.19.3", "requirements for SadTalker")
+    for k,v in kv.items():
+        if not launch.is_installed(k):
+            print(k, launch.is_installed(k))
+            launch.run_pip("install "+ v, "requirements for SadTalker")
+    if os.getenv('SADTALKER_CHECKPOINTS'):
+        print('load Sadtalker Checkpoints from '+ os.getenv('SADTALKER_CHECKPOINTS'))
+    elif get_default_checkpoint_path() is not None:
+        os.environ['SADTALKER_CHECKPOINTS'] = str(get_default_checkpoint_path())
+    else:
+        print(
+            """"
+            SadTalker will not support download all the files from hugging face, which will take a long time.
+            please manually set the SADTALKER_CHECKPOINTS in `webui_user.bat`(windows) or `webui_user.sh`(linux)
+            """
+            )
+        # python = sys.executable
+        # launch.run(f'"{python}" -m pip uninstall -y huggingface_hub', live=True)
+        # launch.run(f'"{python}" -m pip install --upgrade git+https://github.com/huggingface/huggingface_hub@main', live=True)
+        # ### run the scripts to downlod models to correct localtion.
+        # # print('download models for SadTalker')
+        # # launch.run("cd " + paths.script_path+"/extensions/SadTalker && bash ./scripts/download_models.sh", live=True)
+        # # print('SadTalker is successfully installed!')
+        # download_model(paths.script_path+'/extensions/SadTalker/checkpoints')
+def on_ui_tabs():
+    install()
+    sys.path.extend([paths.script_path+'/extensions/SadTalker'])
+    repo_dir = paths.script_path+'/extensions/SadTalker/'
+    result_dir = opts.sadtalker_result_dir
+    os.makedirs(result_dir, exist_ok=True)
+    from app_sadtalker import sadtalker_demo
+    if  os.getenv('SADTALKER_CHECKPOINTS'):
+        checkpoint_path = os.getenv('SADTALKER_CHECKPOINTS')
+    else:
+        checkpoint_path = repo_dir+'checkpoints/'
+    audio_to_video = sadtalker_demo(checkpoint_path=checkpoint_path, config_path=repo_dir+'src/config', warpfn = wrap_queued_call)
+    return [(audio_to_video, "SadTalker", "extension")]
+def on_ui_settings():
+    talker_path = Path(paths.script_path) / "outputs"
+    section = ('extension', "SadTalker")
+    opts.add_option("sadtalker_result_dir", OptionInfo(str(talker_path / "SadTalker/"), "Path to save results of sadtalker", section=section))
+script_callbacks.on_ui_settings(on_ui_settings)
+script_callbacks.on_ui_tabs(on_ui_tabs)

scripts/test.sh ADDED Viewed

	@@ -0,0 +1,21 @@

+# ### some test command before commit.
+# python inference.py --preprocess crop --size 256
+# python inference.py --preprocess crop --size 512
+# python inference.py --preprocess extcrop --size 256
+# python inference.py --preprocess extcrop --size 512
+# python inference.py --preprocess resize --size 256
+# python inference.py --preprocess resize --size 512
+# python inference.py --preprocess full --size 256
+# python inference.py --preprocess full --size 512
+# python inference.py --preprocess extfull --size 256
+# python inference.py --preprocess extfull --size 512
+python inference.py --preprocess full --size 256 --enhancer gfpgan
+python inference.py --preprocess full --size 512 --enhancer gfpgan
+python inference.py --preprocess full --size 256 --enhancer gfpgan --still
+python inference.py --preprocess full --size 512 --enhancer gfpgan --still

src/gradio_demo.py CHANGED Viewed

@@ -28,6 +28,7 @@ class SadTalker():
         else:
             device = "cpu"
         self.device = device
         os.environ['TORCH_HOME']= checkpoint_path

         else:
             device = "cpu"
+        print(f'Using device: {device}')
         self.device = device
         os.environ['TORCH_HOME']= checkpoint_path

webui.bat ADDED Viewed

	@@ -0,0 +1,17 @@

+@echo off
+IF NOT EXIST venv (
+python -m venv venv
+) ELSE (
+echo venv folder already exists, skipping creation...
+)
+call .\venv\Scripts\activate.bat
+set PYTHON="venv\Scripts\Python.exe"
+echo venv %PYTHON%
+%PYTHON% App.py
+echo.
+echo Launch unsuccessful. Exiting.
+pause