Spaces:

lazhrach
/

AvatarTest

Runtime error

App Files Files

lazhrach commited on Mar 6

Commit

c275b63

•

1 Parent(s): a7a3eb8

Initial commit

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitignore +159 -0
Dockerfile +59 -0
LICENSE +21 -0
app.py +223 -0
docs/sadtalker_logo.png +0 -0
examples/driven_audio/RD_Radio31_000.wav +0 -0
examples/driven_audio/RD_Radio34_002.wav +0 -0
examples/driven_audio/RD_Radio36_000.wav +0 -0
examples/driven_audio/RD_Radio40_000.wav +0 -0
examples/driven_audio/bus_chinese.wav +0 -0
examples/driven_audio/chinese_news.wav +3 -0
examples/driven_audio/chinese_poem1.wav +0 -0
examples/driven_audio/chinese_poem2.wav +0 -0
examples/driven_audio/deyu.wav +3 -0
examples/driven_audio/eluosi.wav +3 -0
examples/driven_audio/fayu.wav +3 -0
examples/driven_audio/imagine.wav +3 -0
examples/driven_audio/itosinger1.wav +0 -0
examples/driven_audio/japanese.wav +3 -0
examples/source_image/art_0.png +0 -0
examples/source_image/art_1.png +0 -0
examples/source_image/art_10.png +0 -0
examples/source_image/art_11.png +0 -0
examples/source_image/art_12.png +0 -0
examples/source_image/art_13.png +0 -0
examples/source_image/art_14.png +0 -0
examples/source_image/art_15.png +0 -0
examples/source_image/art_16.png +3 -0
examples/source_image/art_17.png +3 -0
examples/source_image/art_18.png +0 -0
examples/source_image/art_19.png +0 -0
examples/source_image/art_2.png +0 -0
examples/source_image/art_20.png +0 -0
examples/source_image/art_3.png +3 -0
examples/source_image/art_4.png +3 -0
examples/source_image/art_5.png +3 -0
examples/source_image/art_6.png +0 -0
examples/source_image/art_7.png +0 -0
examples/source_image/art_8.png +3 -0
examples/source_image/art_9.png +3 -0
examples/source_image/full3.png +0 -0
examples/source_image/full4.jpeg +0 -0
examples/source_image/full_body_1.png +0 -0
examples/source_image/full_body_2.png +0 -0
examples/source_image/happy.png +0 -0
examples/source_image/happy1.png +0 -0
examples/source_image/people_0.png +0 -0
examples/source_image/sad.png +0 -0
examples/source_image/sad1.png +0 -0
packages.txt +2 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,159 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+results/
+checkpoints/
+gradio_cached_examples/
+gfpgan/
+start.sh

Dockerfile ADDED Viewed

	@@ -0,0 +1,59 @@

+FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && \
+    apt-get upgrade -y && \
+    apt-get install -y --no-install-recommends \
+    git \
+    zip \
+    unzip \
+    git-lfs \
+    wget \
+    curl \
+    # ffmpeg \
+    ffmpeg \
+    x264 \
+    # python build dependencies \
+    build-essential \
+    libssl-dev \
+    zlib1g-dev \
+    libbz2-dev \
+    libreadline-dev \
+    libsqlite3-dev \
+    libncursesw5-dev \
+    xz-utils \
+    tk-dev \
+    libxml2-dev \
+    libxmlsec1-dev \
+    libffi-dev \
+    liblzma-dev && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:${PATH}
+WORKDIR ${HOME}/app
+RUN curl https://pyenv.run | bash
+ENV PATH=${HOME}/.pyenv/shims:${HOME}/.pyenv/bin:${PATH}
+ENV PYTHON_VERSION=3.10.9
+RUN pyenv install ${PYTHON_VERSION} && \
+    pyenv global ${PYTHON_VERSION} && \
+    pyenv rehash && \
+    pip install --no-cache-dir -U pip setuptools wheel
+RUN pip install --no-cache-dir -U torch==1.12.1 torchvision==0.13.1
+COPY --chown=1000 requirements.txt /tmp/requirements.txt
+RUN pip install --no-cache-dir -U -r /tmp/requirements.txt
+COPY --chown=1000 . ${HOME}/app
+RUN ls -a
+ENV PYTHONPATH=${HOME}/app \
+    PYTHONUNBUFFERED=1 \
+    GRADIO_ALLOW_FLAGGING=never \
+    GRADIO_NUM_PORTS=1 \
+    GRADIO_SERVER_NAME=0.0.0.0 \
+    GRADIO_THEME=huggingface \
+    SYSTEM=spaces
+CMD ["python", "app.py"]

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2023 Tencent AI Lab
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

app.py ADDED Viewed

	@@ -0,0 +1,223 @@

+import os, sys
+import tempfile
+import gradio as gr
+from src.gradio_demo import SadTalker
+# from src.utils.text2speech import TTSTalker
+from huggingface_hub import snapshot_download
+def get_source_image(image):
+        return image
+try:
+    import webui  # in webui
+    in_webui = True
+except:
+    in_webui = False
+def toggle_audio_file(choice):
+    if choice == False:
+        return gr.update(visible=True), gr.update(visible=False)
+    else:
+        return gr.update(visible=False), gr.update(visible=True)
+def ref_video_fn(path_of_ref_video):
+    if path_of_ref_video is not None:
+        return gr.update(value=True)
+    else:
+        return gr.update(value=False)
+def download_model():
+    REPO_ID = 'vinthony/SadTalker-V002rc'
+    snapshot_download(repo_id=REPO_ID, local_dir='./checkpoints', local_dir_use_symlinks=True)
+def sadtalker_demo():
+    download_model()
+    sad_talker = SadTalker(lazy_load=True)
+    # tts_talker = TTSTalker()
+    with gr.Blocks(analytics_enabled=False) as sadtalker_interface:
+        gr.Markdown("<div align='center'> <h2> 😭 SadTalker: Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation (CVPR 2023) </span> </h2> \
+                    <a style='font-size:18px;color: #efefef' href='https://arxiv.org/abs/2211.12194'>Arxiv</a> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp; \
+                    <a style='font-size:18px;color: #efefef' href='https://sadtalker.github.io'>Homepage</a>  &nbsp;&nbsp;&nbsp;&nbsp;&nbsp; \
+                     <a style='font-size:18px;color: #efefef' href='https://github.com/Winfredy/SadTalker'> Github </div>")
+        gr.Markdown("""
+        <b>You may duplicate the space and upgrade to GPU in settings for better performance and faster inference without waiting in the queue. <a style='display:inline-block' href="https://huggingface.co/spaces/vinthony/SadTalker?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a></b> \
+        <br/><b>Alternatively, try our GitHub <a href=https://github.com/Winfredy/SadTalker> code </a> on your own GPU. </b> <a style='display:inline-block' href="https://github.com/Winfredy/SadTalker"><img src="https://img.shields.io/github/stars/Winfredy/SadTalker?style=social"/></a> \
+        """)
+        with gr.Row().style(equal_height=False):
+            with gr.Column(variant='panel'):
+                with gr.Tabs(elem_id="sadtalker_source_image"):
+                    with gr.TabItem('Source image'):
+                        with gr.Row():
+                            source_image = gr.Image(label="Source image", source="upload", type="filepath", elem_id="img2img_image").style(width=512)
+                with gr.Tabs(elem_id="sadtalker_driven_audio"):
+                    with gr.TabItem('Driving Methods'):
+                        gr.Markdown("Possible driving combinations: <br> 1. Audio only 2. Audio/IDLE Mode + Ref Video(pose, blink, pose+blink) 3. IDLE Mode only 4. Ref Video only (all) ")
+                        with gr.Row():
+                            driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath")
+                            driven_audio_no = gr.Audio(label="Use IDLE mode, no audio is required", source="upload", type="filepath", visible=False)
+                            with gr.Column():
+                                use_idle_mode = gr.Checkbox(label="Use Idle Animation")
+                                length_of_audio = gr.Number(value=5, label="The length(seconds) of the generated video.")
+                                use_idle_mode.change(toggle_audio_file, inputs=use_idle_mode, outputs=[driven_audio, driven_audio_no]) # todo
+                        with gr.Row():
+                            ref_video = gr.Video(label="Reference Video", source="upload", type="filepath", elem_id="vidref").style(width=512)
+                            with gr.Column():
+                                use_ref_video = gr.Checkbox(label="Use Reference Video")
+                                ref_info = gr.Radio(['pose', 'blink','pose+blink', 'all'], value='pose', label='Reference Video',info="How to borrow from reference Video?((fully transfer, aka, video driving mode))")
+                            ref_video.change(ref_video_fn, inputs=ref_video, outputs=[use_ref_video]) # todo
+            with gr.Column(variant='panel'):
+                with gr.Tabs(elem_id="sadtalker_checkbox"):
+                    with gr.TabItem('Settings'):
+                        gr.Markdown("need help? please visit our [[best practice page](https://github.com/OpenTalker/SadTalker/blob/main/docs/best_practice.md)] for more detials")
+                        with gr.Column(variant='panel'):
+                            # width = gr.Slider(minimum=64, elem_id="img2img_width", maximum=2048, step=8, label="Manually Crop Width", value=512) # img2img_width
+                            # height = gr.Slider(minimum=64, elem_id="img2img_height", maximum=2048, step=8, label="Manually Crop Height", value=512) # img2img_width
+                            with gr.Row():
+                                pose_style = gr.Slider(minimum=0, maximum=45, step=1, label="Pose style", value=0) #
+                                exp_weight = gr.Slider(minimum=0, maximum=3, step=0.1, label="expression scale", value=1) #
+                                blink_every = gr.Checkbox(label="use eye blink", value=True)
+                            with gr.Row():
+                                size_of_image = gr.Radio([256, 512], value=256, label='face model resolution', info="use 256/512 model?") #
+                                preprocess_type = gr.Radio(['crop', 'resize','full', 'extcrop', 'extfull'], value='crop', label='preprocess', info="How to handle input image?")
+                            with gr.Row():
+                                is_still_mode = gr.Checkbox(label="Still Mode (fewer head motion, works with preprocess `full`)")
+                                facerender = gr.Radio(['facevid2vid','pirender'], value='facevid2vid', label='facerender', info="which face render?")
+                            with gr.Row():
+                                batch_size = gr.Slider(label="batch size in generation", step=1, maximum=10, value=1)
+                                enhancer = gr.Checkbox(label="GFPGAN as Face enhancer")
+                            submit = gr.Button('Generate', elem_id="sadtalker_generate", variant='primary')
+                with gr.Tabs(elem_id="sadtalker_genearted"):
+                        gen_video = gr.Video(label="Generated video", format="mp4").style(width=256)
+        submit.click(
+                fn=sad_talker.test,
+                inputs=[source_image,
+                        driven_audio,
+                        preprocess_type,
+                        is_still_mode,
+                        enhancer,
+                        batch_size,
+                        size_of_image,
+                        pose_style,
+                        facerender,
+                        exp_weight,
+                        use_ref_video,
+                        ref_video,
+                        ref_info,
+                        use_idle_mode,
+                        length_of_audio,
+                        blink_every
+                        ],
+                outputs=[gen_video]
+                )
+        with gr.Row():
+            examples = [
+                [
+                    'examples/source_image/full_body_1.png',
+                    'examples/driven_audio/bus_chinese.wav',
+                    'crop',
+                    True,
+                    False
+                ],
+                [
+                    'examples/source_image/full_body_2.png',
+                    'examples/driven_audio/japanese.wav',
+                    'crop',
+                    False,
+                    False
+                ],
+                [
+                    'examples/source_image/full3.png',
+                    'examples/driven_audio/deyu.wav',
+                    'crop',
+                    False,
+                    True
+                ],
+                [
+                    'examples/source_image/full4.jpeg',
+                    'examples/driven_audio/eluosi.wav',
+                    'full',
+                    False,
+                    True
+                ],
+                [
+                    'examples/source_image/full4.jpeg',
+                    'examples/driven_audio/imagine.wav',
+                    'full',
+                    True,
+                    True
+                ],
+                [
+                    'examples/source_image/full_body_1.png',
+                    'examples/driven_audio/bus_chinese.wav',
+                    'full',
+                    True,
+                    False
+                ],
+                [
+                    'examples/source_image/art_13.png',
+                    'examples/driven_audio/fayu.wav',
+                    'resize',
+                    True,
+                    False
+                ],
+                [
+                    'examples/source_image/art_5.png',
+                    'examples/driven_audio/chinese_news.wav',
+                    'resize',
+                    False,
+                    False
+                ],
+                [
+                    'examples/source_image/art_5.png',
+                    'examples/driven_audio/RD_Radio31_000.wav',
+                    'resize',
+                    True,
+                    True
+                ],
+            ]
+            gr.Examples(examples=examples,
+                        inputs=[
+                            source_image,
+                            driven_audio,
+                            preprocess_type,
+                            is_still_mode,
+                            enhancer],
+                        outputs=[gen_video],
+                        fn=sad_talker.test,
+                        cache_examples=os.getenv('SYSTEM') == 'spaces') #
+    return sadtalker_interface
+if __name__ == "__main__":
+    demo = sadtalker_demo()
+    demo.queue(max_size=10)
+    demo.launch(debug=True)

docs/sadtalker_logo.png ADDED Viewed

examples/driven_audio/RD_Radio31_000.wav ADDED Viewed

Binary file (512 kB). View file

examples/driven_audio/RD_Radio34_002.wav ADDED Viewed

Binary file (512 kB). View file

examples/driven_audio/RD_Radio36_000.wav ADDED Viewed

Binary file (512 kB). View file

examples/driven_audio/RD_Radio40_000.wav ADDED Viewed

Binary file (512 kB). View file

examples/driven_audio/bus_chinese.wav ADDED Viewed

Binary file (652 kB). View file

examples/driven_audio/chinese_news.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7b0f4d313a1ca671bc4831d60bcf0c12225efbffe6c0e93e54fbfe9bcd4021cb
+size 1536078

examples/driven_audio/chinese_poem1.wav ADDED Viewed

Binary file (263 kB). View file

examples/driven_audio/chinese_poem2.wav ADDED Viewed

Binary file (461 kB). View file

examples/driven_audio/deyu.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ba1839c57770a2ab0b593ce814344bfd4d750da02acc9be9e8cf5b9113a0f88a
+size 2694784

examples/driven_audio/eluosi.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d4a3593815dc7b68c256672baa61934c9479efa770af2065fb0886f02713606e
+size 1786672

examples/driven_audio/fayu.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:16ebd13626ae4171030b4ea05cceef06078483c352e4b68d469fc2a52bfffceb
+size 1940428

examples/driven_audio/imagine.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2db410217e074d91ae6011e1c5dc0b94f02d05d381c50af8e54253eeacad17d2
+size 1618510

examples/driven_audio/itosinger1.wav ADDED Viewed

Binary file (500 kB). View file

examples/driven_audio/japanese.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3db5426d0b158799e2be4f609b11f75bfbd4affffe18e9a1c8e6f241fcdedcfc
+size 2622712

examples/source_image/art_0.png ADDED Viewed

examples/source_image/art_1.png ADDED Viewed

examples/source_image/art_10.png ADDED Viewed

examples/source_image/art_11.png ADDED Viewed

examples/source_image/art_12.png ADDED Viewed

examples/source_image/art_13.png ADDED Viewed

examples/source_image/art_14.png ADDED Viewed

examples/source_image/art_15.png ADDED Viewed

examples/source_image/art_16.png ADDED Viewed

Git LFS Details

SHA256: 3f6d350055eea3abe35ee3fe9df80dcd99d8edae66ef4fc20bf06168bf189f25
Pointer size: 132 Bytes
Size of remote file: 1.48 MB

examples/source_image/art_17.png ADDED Viewed

Git LFS Details

SHA256: 05747bb45dcf271d9bb24344bd1bce0e0746d24ce4e13545b27ad40b50c3bfe7
Pointer size: 132 Bytes
Size of remote file: 2.09 MB

examples/source_image/art_18.png ADDED Viewed

examples/source_image/art_19.png ADDED Viewed

examples/source_image/art_2.png ADDED Viewed

examples/source_image/art_20.png ADDED Viewed

examples/source_image/art_3.png ADDED Viewed

Git LFS Details

SHA256: 81be3a9cc605ab01cbf741330b406db5246e8bbbcb443ad43ffeca2ef161e005
Pointer size: 132 Bytes
Size of remote file: 1.35 MB

examples/source_image/art_4.png ADDED Viewed

Git LFS Details

SHA256: ab322220d8eab1bfefdaedea91ca5d08a34258c1ab1e585a9b1c85b32968f983
Pointer size: 132 Bytes
Size of remote file: 3.63 MB

examples/source_image/art_5.png ADDED Viewed

Git LFS Details

SHA256: 199217b4c839ed849577aedcad32f2bce934628b9783ba4654a93756b25e7896
Pointer size: 132 Bytes
Size of remote file: 1.23 MB

examples/source_image/art_6.png ADDED Viewed

examples/source_image/art_7.png ADDED Viewed

examples/source_image/art_8.png ADDED Viewed

Git LFS Details

SHA256: 1d704497947c07ac16534299451fc0526acddf286c2ab4ceb48161ff6facc2af
Pointer size: 132 Bytes
Size of remote file: 3.12 MB

examples/source_image/art_9.png ADDED Viewed

Git LFS Details

SHA256: 90f84739e2aa2388efaf0fac2b57a82df279b213a8dab9faa7af8ae7468b4e80
Pointer size: 132 Bytes
Size of remote file: 1.26 MB

examples/source_image/full3.png ADDED Viewed

examples/source_image/full4.jpeg ADDED Viewed

examples/source_image/full_body_1.png ADDED Viewed

examples/source_image/full_body_2.png ADDED Viewed

examples/source_image/happy.png ADDED Viewed

examples/source_image/happy1.png ADDED Viewed

examples/source_image/people_0.png ADDED Viewed

examples/source_image/sad.png ADDED Viewed

examples/source_image/sad1.png ADDED Viewed

packages.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ ffmpeg
2	+ libsndfile1