diff --git a/app.py b/app.py
index 44ffb9b5bfe538cbbabacf93652e5aaa45be50f5..e2ad7c6463381f98b4379a41b7cccebb57614033 100644
--- a/app.py
+++ b/app.py
@@ -1,90 +1,88 @@
import os, sys
import tempfile
import gradio as gr
-from modules.text2speech import text2speech
-from modules.sadtalker_test import SadTalker
-
-def get_driven_audio(audio):
- if os.path.isfile(audio):
- return audio
- else:
- save_path = tempfile.NamedTemporaryFile(
- delete=False,
- suffix=("." + "wav"),
- )
- gen_audio = text2speech(audio, save_path.name)
- return gen_audio, gen_audio
+from src.gradio_demo import SadTalker
+from src.utils.text2speech import TTSTalker
def get_source_image(image):
return image
-def sadtalker_demo(result_dir='./tmp/'):
+
+
+def sadtalker_demo():
sad_talker = SadTalker()
+ tts_talker = TTSTalker()
+
with gr.Blocks(analytics_enabled=False) as sadtalker_interface:
- gr.Markdown("
😭 SadTalker: Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation (CVPR 2023)
\
+ gr.Markdown("
😭 SadTalker: Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation (CVPR 2023)
\
Arxiv \
Homepage \
-
Github ")
+
Github ")
- with gr.Row():
+ with gr.Row().style(equal_height=False):
with gr.Column(variant='panel'):
with gr.Tabs(elem_id="sadtalker_source_image"):
with gr.TabItem('Upload image'):
with gr.Row():
- source_image = gr.Image(label="Source image", source="upload", type="filepath").style(height=256)
+ source_image = gr.Image(label="Source image", source="upload", type="filepath").style(height=256,width=256)
with gr.Tabs(elem_id="sadtalker_driven_audio"):
- with gr.TabItem('Upload audio(wav/mp3 only currently)'):
+ with gr.TabItem('Upload OR TTS'):
with gr.Column(variant='panel'):
driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath")
+
+ with gr.Column(variant='panel'):
+ input_text = gr.Textbox(label="Generating audio from text", lines=5, placeholder="Alternatively, you can genreate the audio from text using @Coqui.ai TTS.")
+ tts = gr.Button('Generate audio',elem_id="sadtalker_audio_generate", variant='primary')
+ tts.click(fn=tts_talker.test, inputs=[input_text], outputs=[driven_audio])
+
with gr.Column(variant='panel'):
with gr.Tabs(elem_id="sadtalker_checkbox"):
with gr.TabItem('Settings'):
with gr.Column(variant='panel'):
- is_still_mode = gr.Checkbox(label="Still Mode (fewer head motion)").style(container=True)
- is_resize_mode = gr.Checkbox(label="Resize Mode (⚠️ Resize mode need manually crop the image firstly, can handle larger image crop)").style(container=True)
- is_enhance_mode = gr.Checkbox(label="Enhance Mode (better face quality )").style(container=True)
+ is_still_mode = gr.Checkbox(label="w/ Still Mode (fewer hand motion, works on full body)")
+ enhancer = gr.Checkbox(label="w/ GFPGAN as Face enhancer")
submit = gr.Button('Generate', elem_id="sadtalker_generate", variant='primary')
with gr.Tabs(elem_id="sadtalker_genearted"):
gen_video = gr.Video(label="Generated video", format="mp4").style(width=256)
- gen_text = gr.Textbox(visible=False)
-
+
with gr.Row():
examples = [
[
- 'examples/source_image/art_10.png',
- 'examples/driven_audio/deyu.wav',
+ 'examples/source_image/full_body_1.png',
+ 'examples/driven_audio/bus_chinese.wav',
True,
- False,
False
],
[
- 'examples/source_image/art_1.png',
- 'examples/driven_audio/fayu.wav',
+ 'examples/source_image/full_body_2.png',
+ 'examples/driven_audio/itosinger1.wav',
True,
+ False
+ ],
+ [
+ 'examples/source_image/art_13.png',
+ 'examples/driven_audio/fayu.wav',
True,
False
],
[
- 'examples/source_image/art_9.png',
- 'examples/driven_audio/itosinger1.wav',
+ 'examples/source_image/art_5.png',
+ 'examples/driven_audio/chinese_news.wav',
True,
- False,
- True
- ]
+ False
+ ],
]
gr.Examples(examples=examples,
inputs=[
source_image,
driven_audio,
is_still_mode,
- is_resize_mode,
- is_enhance_mode,
- gr.Textbox(value=result_dir, visible=False)],
- outputs=[gen_video, gen_text],
+ enhancer],
+ outputs=[gen_video],
fn=sad_talker.test,
cache_examples=os.getenv('SYSTEM') == 'spaces')
@@ -93,10 +91,8 @@ def sadtalker_demo(result_dir='./tmp/'):
inputs=[source_image,
driven_audio,
is_still_mode,
- is_resize_mode,
- is_enhance_mode,
- gr.Textbox(value=result_dir, visible=False)],
- outputs=[gen_video, gen_text]
+ enhancer],
+ outputs=[gen_video]
)
return sadtalker_interface
@@ -104,8 +100,7 @@ def sadtalker_demo(result_dir='./tmp/'):
if __name__ == "__main__":
- sadtalker_result_dir = os.path.join('./', 'results')
- demo = sadtalker_demo(sadtalker_result_dir)
+ demo = sadtalker_demo()
demo.launch()
diff --git a/examples/driven_audio/bus_chinese.wav b/examples/driven_audio/bus_chinese.wav
new file mode 100644
index 0000000000000000000000000000000000000000..888647738d72dfaee99b8d40bb0ddf6f7a1872e7
Binary files /dev/null and b/examples/driven_audio/bus_chinese.wav differ
diff --git a/examples/source_image/full_body_1.png b/examples/source_image/full_body_1.png
new file mode 100644
index 0000000000000000000000000000000000000000..4fca65c949b7c7e7f7ed9459c473314a38be791f
Binary files /dev/null and b/examples/source_image/full_body_1.png differ
diff --git a/examples/source_image/full_body_2.png b/examples/source_image/full_body_2.png
new file mode 100644
index 0000000000000000000000000000000000000000..b7bc6228cb2f4e8c01af8d2f52bbbf62540e2412
Binary files /dev/null and b/examples/source_image/full_body_2.png differ
diff --git a/examples/source_image/happy.png b/examples/source_image/happy.png
new file mode 100644
index 0000000000000000000000000000000000000000..9d194ba9a03dfda0867703d54ea6233819c46a73
Binary files /dev/null and b/examples/source_image/happy.png differ
diff --git a/examples/source_image/happy1.png b/examples/source_image/happy1.png
new file mode 100644
index 0000000000000000000000000000000000000000..b702974cca1a648ec70efee776e484284b527c90
Binary files /dev/null and b/examples/source_image/happy1.png differ
diff --git a/examples/source_image/people_0.png b/examples/source_image/people_0.png
new file mode 100644
index 0000000000000000000000000000000000000000..8895eeb07a3e300b9bcfa3bb53e7a6a552182bc3
Binary files /dev/null and b/examples/source_image/people_0.png differ
diff --git a/examples/source_image/sad.png b/examples/source_image/sad.png
new file mode 100644
index 0000000000000000000000000000000000000000..6584467fdac971207883cdcd84b31da1dbc4dfa6
Binary files /dev/null and b/examples/source_image/sad.png differ
diff --git a/examples/source_image/sad1.png b/examples/source_image/sad1.png
new file mode 100644
index 0000000000000000000000000000000000000000..341e0cb70886995ecf72eebb4b8a4474ab7d287b
Binary files /dev/null and b/examples/source_image/sad1.png differ
diff --git a/modules/__pycache__/sadtalker_test.cpython-38.pyc b/modules/__pycache__/sadtalker_test.cpython-38.pyc
index c54ce9b8728a52636f9cb9f9c47616709d04cfe4..a96311c6eee958b442fec8776d088b74e7b8b3a2 100644
Binary files a/modules/__pycache__/sadtalker_test.cpython-38.pyc and b/modules/__pycache__/sadtalker_test.cpython-38.pyc differ
diff --git a/src/__pycache__/generate_batch.cpython-38.pyc b/src/__pycache__/generate_batch.cpython-38.pyc
index c68dd09e49933b52115307195bf3aa446d924922..dc3eb4726e9835d34c08362da995941fef530b8f 100644
Binary files a/src/__pycache__/generate_batch.cpython-38.pyc and b/src/__pycache__/generate_batch.cpython-38.pyc differ
diff --git a/src/__pycache__/generate_facerender_batch.cpython-38.pyc b/src/__pycache__/generate_facerender_batch.cpython-38.pyc
index 6a30615ed3eaa5902a2fa553ed3ed17a9ae92a51..cc944270498549b70e901f5b1c764d1d832eb49e 100644
Binary files a/src/__pycache__/generate_facerender_batch.cpython-38.pyc and b/src/__pycache__/generate_facerender_batch.cpython-38.pyc differ
diff --git a/src/__pycache__/test_audio2coeff.cpython-38.pyc b/src/__pycache__/test_audio2coeff.cpython-38.pyc
index c2553cc97f50096d7c7005ad39274a8653cb6ad4..a6d261868c02b57145618adcd583481cf623e391 100644
Binary files a/src/__pycache__/test_audio2coeff.cpython-38.pyc and b/src/__pycache__/test_audio2coeff.cpython-38.pyc differ
diff --git a/src/audio2exp_models/__pycache__/audio2exp.cpython-38.pyc b/src/audio2exp_models/__pycache__/audio2exp.cpython-38.pyc
index 460563d74a990c40a3c5bd6f3209acca6d86b550..de88551314f6c19ad1f5b5b33704f1303f51e029 100644
Binary files a/src/audio2exp_models/__pycache__/audio2exp.cpython-38.pyc and b/src/audio2exp_models/__pycache__/audio2exp.cpython-38.pyc differ
diff --git a/src/audio2exp_models/__pycache__/networks.cpython-38.pyc b/src/audio2exp_models/__pycache__/networks.cpython-38.pyc
index 766660615f22f94c740dd420ccef83ed442c4fac..d703bd9e8f3d0c27c16fa713bba3d0969e984ad3 100644
Binary files a/src/audio2exp_models/__pycache__/networks.cpython-38.pyc and b/src/audio2exp_models/__pycache__/networks.cpython-38.pyc differ
diff --git a/src/audio2exp_models/audio2exp.py b/src/audio2exp_models/audio2exp.py
index 5f6e6b77b0ceb2089539caa440f7106c7b1e8aa2..9e79a929560592687a505e13188796e2b0ca8772 100644
--- a/src/audio2exp_models/audio2exp.py
+++ b/src/audio2exp_models/audio2exp.py
@@ -22,7 +22,8 @@ class Audio2Exp(nn.Module):
current_mel_input = mel_input[:,i:i+10]
- ref = batch['ref'][:, :, :64].repeat((1,current_mel_input.shape[1],1)) #bs T 64
+ #ref = batch['ref'][:, :, :64].repeat((1,current_mel_input.shape[1],1)) #bs T 64
+ ref = batch['ref'][:, :, :64][:, i:i+10]
ratio = batch['ratio_gt'][:, i:i+10] #bs T
audiox = current_mel_input.view(-1, 1, 80, 16) # bs*T 1 80 16
diff --git a/src/audio2pose_models/__pycache__/audio2pose.cpython-38.pyc b/src/audio2pose_models/__pycache__/audio2pose.cpython-38.pyc
index 20fa93168344012f0bdb77727b5b5669fac8a10b..5b2dcc996a73224e972148e252fb4e2deedd69a5 100644
Binary files a/src/audio2pose_models/__pycache__/audio2pose.cpython-38.pyc and b/src/audio2pose_models/__pycache__/audio2pose.cpython-38.pyc differ
diff --git a/src/audio2pose_models/__pycache__/audio_encoder.cpython-38.pyc b/src/audio2pose_models/__pycache__/audio_encoder.cpython-38.pyc
index 97d9bdf072c5bd356cc312357646c6eae2b798d0..b0f11a59fea18ee93c30da5cd4c94d04897ea010 100644
Binary files a/src/audio2pose_models/__pycache__/audio_encoder.cpython-38.pyc and b/src/audio2pose_models/__pycache__/audio_encoder.cpython-38.pyc differ
diff --git a/src/audio2pose_models/__pycache__/cvae.cpython-38.pyc b/src/audio2pose_models/__pycache__/cvae.cpython-38.pyc
index 0d9aaee3ad4caa8afc40f723d224eb5b25e8afcd..1aa0e494be950e6ca972390b27f2dddc8be6d193 100644
Binary files a/src/audio2pose_models/__pycache__/cvae.cpython-38.pyc and b/src/audio2pose_models/__pycache__/cvae.cpython-38.pyc differ
diff --git a/src/audio2pose_models/__pycache__/discriminator.cpython-38.pyc b/src/audio2pose_models/__pycache__/discriminator.cpython-38.pyc
index c7ebfcd0dd3538cedeb7eba984f94d9763b392c6..817b8836123ed1a3b5795d912d84c3ff54d7accc 100644
Binary files a/src/audio2pose_models/__pycache__/discriminator.cpython-38.pyc and b/src/audio2pose_models/__pycache__/discriminator.cpython-38.pyc differ
diff --git a/src/audio2pose_models/__pycache__/networks.cpython-38.pyc b/src/audio2pose_models/__pycache__/networks.cpython-38.pyc
index 239626089b91321b1c00cfba2dfe0a3ba1ccb0b9..d18f56064377373a8f4f400c59379b0b79d9f649 100644
Binary files a/src/audio2pose_models/__pycache__/networks.cpython-38.pyc and b/src/audio2pose_models/__pycache__/networks.cpython-38.pyc differ
diff --git a/src/audio2pose_models/__pycache__/res_unet.cpython-38.pyc b/src/audio2pose_models/__pycache__/res_unet.cpython-38.pyc
index 0e6b40591fd932ddb2cf686b72afd08c90de1a44..5aa2863a646a6eb8b44e0ebdebc5c21b562c2f39 100644
Binary files a/src/audio2pose_models/__pycache__/res_unet.cpython-38.pyc and b/src/audio2pose_models/__pycache__/res_unet.cpython-38.pyc differ
diff --git a/src/audio2pose_models/audio2pose.py b/src/audio2pose_models/audio2pose.py
index 3a37179e221340662a817628df3d01ae9e34404f..1a8410d6ee7f7f1d50305f61332bfbdb9dc8bf0e 100644
--- a/src/audio2pose_models/audio2pose.py
+++ b/src/audio2pose_models/audio2pose.py
@@ -12,7 +12,7 @@ class Audio2Pose(nn.Module):
self.latent_dim = cfg.MODEL.CVAE.LATENT_SIZE
self.device = device
- self.audio_encoder = AudioEncoder(wav2lip_checkpoint)
+ self.audio_encoder = AudioEncoder(wav2lip_checkpoint, device)
self.audio_encoder.eval()
for param in self.audio_encoder.parameters():
param.requires_grad = False
@@ -20,10 +20,6 @@ class Audio2Pose(nn.Module):
self.netG = CVAE(cfg)
self.netD_motion = PoseSequenceDiscriminator(cfg)
- self.gan_criterion = nn.MSELoss()
- self.reg_criterion = nn.L1Loss(reduction='none')
- self.pair_criterion = nn.PairwiseDistance()
- self.cosine_loss = nn.CosineSimilarity(dim=1)
def forward(self, x):
@@ -81,6 +77,10 @@ class Audio2Pose(nn.Module):
z = torch.randn(bs, self.latent_dim).to(ref.device)
batch['z'] = z
audio_emb = self.audio_encoder(indiv_mels_use[:, -1*self.seq_len:,:,:,:]) #bs seq_len 512
+ if audio_emb.shape[1] != self.seq_len:
+ pad_dim = self.seq_len-audio_emb.shape[1]
+ pad_audio_emb = audio_emb[:, :1].repeat(1, pad_dim, 1)
+ audio_emb = torch.cat([pad_audio_emb, audio_emb], 1)
batch['audio_emb'] = audio_emb
batch = self.netG.test(batch)
pose_motion_pred_list.append(batch['pose_motion_pred'][:,-1*re:,:])
diff --git a/src/audio2pose_models/audio_encoder.py b/src/audio2pose_models/audio_encoder.py
index 0ce036df119f86ef28c3ac8d6c834264571c309a..ea9095ad762caf48ff0f97abf4a086f6f7fee7e7 100644
--- a/src/audio2pose_models/audio_encoder.py
+++ b/src/audio2pose_models/audio_encoder.py
@@ -19,7 +19,7 @@ class Conv2d(nn.Module):
return self.act(out)
class AudioEncoder(nn.Module):
- def __init__(self, wav2lip_checkpoint):
+ def __init__(self, wav2lip_checkpoint, device):
super(AudioEncoder, self).__init__()
self.audio_encoder = nn.Sequential(
@@ -41,8 +41,8 @@ class AudioEncoder(nn.Module):
Conv2d(256, 512, kernel_size=3, stride=1, padding=0),
Conv2d(512, 512, kernel_size=1, stride=1, padding=0),)
- #### load the pre-trained audio_encoder\
- wav2lip_state_dict = torch.load(wav2lip_checkpoint)['state_dict']
+ #### load the pre-trained audio_encoder
+ wav2lip_state_dict = torch.load(wav2lip_checkpoint, map_location=torch.device(device))['state_dict']
state_dict = self.audio_encoder.state_dict()
for k,v in wav2lip_state_dict.items():
diff --git a/src/face3d/__pycache__/extract_kp_videos.cpython-38.pyc b/src/face3d/__pycache__/extract_kp_videos.cpython-38.pyc
index 0469c877400338fae921f4aedf1159b03abbb101..25b9b1377b35ea7231f4d3b44d81aab8d44f4b5b 100644
Binary files a/src/face3d/__pycache__/extract_kp_videos.cpython-38.pyc and b/src/face3d/__pycache__/extract_kp_videos.cpython-38.pyc differ
diff --git a/src/face3d/__pycache__/visualize.cpython-38.pyc b/src/face3d/__pycache__/visualize.cpython-38.pyc
deleted file mode 100644
index a666447a57777ba5a4c6ed6642f234b79c45d372..0000000000000000000000000000000000000000
Binary files a/src/face3d/__pycache__/visualize.cpython-38.pyc and /dev/null differ
diff --git a/src/face3d/models/__pycache__/__init__.cpython-38.pyc b/src/face3d/models/__pycache__/__init__.cpython-38.pyc
index 886f0b184346c5530d0bf8d6f4b2300079511225..023f4afb376ad418cc6e3cdd9e821cfa0bcd33f3 100644
Binary files a/src/face3d/models/__pycache__/__init__.cpython-38.pyc and b/src/face3d/models/__pycache__/__init__.cpython-38.pyc differ
diff --git a/src/face3d/models/__pycache__/base_model.cpython-38.pyc b/src/face3d/models/__pycache__/base_model.cpython-38.pyc
index e42691ec8e26c5c38baf6bd0172dff8110754da1..1076d15ca87eb8922a4fb3706a3aff777187b612 100644
Binary files a/src/face3d/models/__pycache__/base_model.cpython-38.pyc and b/src/face3d/models/__pycache__/base_model.cpython-38.pyc differ
diff --git a/src/face3d/models/__pycache__/bfm.cpython-38.pyc b/src/face3d/models/__pycache__/bfm.cpython-38.pyc
deleted file mode 100644
index 088a48bf9f0cabeb667c11c21000f0254c63ec81..0000000000000000000000000000000000000000
Binary files a/src/face3d/models/__pycache__/bfm.cpython-38.pyc and /dev/null differ
diff --git a/src/face3d/models/__pycache__/facerecon_model.cpython-38.pyc b/src/face3d/models/__pycache__/facerecon_model.cpython-38.pyc
deleted file mode 100644
index 3e8de7975dee1099cb3e7698227df4e4062f86ee..0000000000000000000000000000000000000000
Binary files a/src/face3d/models/__pycache__/facerecon_model.cpython-38.pyc and /dev/null differ
diff --git a/src/face3d/models/__pycache__/losses.cpython-38.pyc b/src/face3d/models/__pycache__/losses.cpython-38.pyc
deleted file mode 100644
index ffbf94d1f1e09d5ba0653c588b0cfaeb3df7b920..0000000000000000000000000000000000000000
Binary files a/src/face3d/models/__pycache__/losses.cpython-38.pyc and /dev/null differ
diff --git a/src/face3d/models/__pycache__/networks.cpython-38.pyc b/src/face3d/models/__pycache__/networks.cpython-38.pyc
index 1a97b5cd3309786e87448c4478ae2d19a18e096b..e52b5dac3ce0e017ed844aed711ddfb94223be98 100644
Binary files a/src/face3d/models/__pycache__/networks.cpython-38.pyc and b/src/face3d/models/__pycache__/networks.cpython-38.pyc differ
diff --git a/src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-36.pyc b/src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-36.pyc
deleted file mode 100644
index c49397797cf06eaa01ef1327d25f0c145a511994..0000000000000000000000000000000000000000
Binary files a/src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-36.pyc and /dev/null differ
diff --git a/src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-37.pyc b/src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-37.pyc
deleted file mode 100644
index 82f8ed2b49d5c718fe15c47d620156600f776765..0000000000000000000000000000000000000000
Binary files a/src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-37.pyc and /dev/null differ
diff --git a/src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-38.pyc b/src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-38.pyc
index 83f6ad3ed4af3cc3d3cfa9067e345cdffb058638..a891077dd80e455e762875f37b16ff11e58441e7 100644
Binary files a/src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-38.pyc and b/src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-38.pyc differ
diff --git a/src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-39.pyc b/src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-39.pyc
deleted file mode 100644
index b1291676de1f08eaba633f000d015eab672e0036..0000000000000000000000000000000000000000
Binary files a/src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-39.pyc and /dev/null differ
diff --git a/src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-36.pyc b/src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-36.pyc
deleted file mode 100644
index 6be617e2ecf266f566e6e5d4972465fcd0379ac5..0000000000000000000000000000000000000000
Binary files a/src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-36.pyc and /dev/null differ
diff --git a/src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-37.pyc b/src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-37.pyc
deleted file mode 100644
index 0a085d7cb2aa24dabc85966931e3aa9db54310e3..0000000000000000000000000000000000000000
Binary files a/src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-37.pyc and /dev/null differ
diff --git a/src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-38.pyc b/src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-38.pyc
index f59247d26d9210b5fd2960df842753a903a90b3d..e7d3278234555217f1055e02d930d1cd8731afa1 100644
Binary files a/src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-38.pyc and b/src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-38.pyc differ
diff --git a/src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-39.pyc b/src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-39.pyc
deleted file mode 100644
index d8a633135905cc3c5fe7673c6d6ab584e0692ce7..0000000000000000000000000000000000000000
Binary files a/src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-39.pyc and /dev/null differ
diff --git a/src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-36.pyc b/src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-36.pyc
deleted file mode 100644
index 6d9748f002ee2f953efa2391054329b6d32f9016..0000000000000000000000000000000000000000
Binary files a/src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-36.pyc and /dev/null differ
diff --git a/src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-37.pyc b/src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-37.pyc
deleted file mode 100644
index 50b9f06989f4ca4f6f5bd7a1fdf1952f2035e974..0000000000000000000000000000000000000000
Binary files a/src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-37.pyc and /dev/null differ
diff --git a/src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-38.pyc b/src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-38.pyc
index d8edc64d28aa3e3fb8c26ba795d04a8ef35b1540..db57e8b41e4fe5bdbee04db62986c15c0e4bffb1 100644
Binary files a/src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-38.pyc and b/src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-38.pyc differ
diff --git a/src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-39.pyc b/src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-39.pyc
deleted file mode 100644
index 24ebbc749bfa90340e389e2c88bd1f8218c3e338..0000000000000000000000000000000000000000
Binary files a/src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-39.pyc and /dev/null differ
diff --git a/src/face3d/util/__pycache__/__init__.cpython-38.pyc b/src/face3d/util/__pycache__/__init__.cpython-38.pyc
index 22771f3169f2da9a37c1bd619a0e5d05003492b9..2671705d02bed0a099b4a375070d0949c1450b7b 100644
Binary files a/src/face3d/util/__pycache__/__init__.cpython-38.pyc and b/src/face3d/util/__pycache__/__init__.cpython-38.pyc differ
diff --git a/src/face3d/util/__pycache__/load_mats.cpython-38.pyc b/src/face3d/util/__pycache__/load_mats.cpython-38.pyc
index 8a48b59ca078ef709825d54c069f518c15103c4e..f44224c0f7c12afc3590f10b9f5ac570b6b668bb 100644
Binary files a/src/face3d/util/__pycache__/load_mats.cpython-38.pyc and b/src/face3d/util/__pycache__/load_mats.cpython-38.pyc differ
diff --git a/src/face3d/util/__pycache__/nvdiffrast.cpython-38.pyc b/src/face3d/util/__pycache__/nvdiffrast.cpython-38.pyc
deleted file mode 100644
index 0ac5cc3eb7c6fd3141005a9cd53f604c49036717..0000000000000000000000000000000000000000
Binary files a/src/face3d/util/__pycache__/nvdiffrast.cpython-38.pyc and /dev/null differ
diff --git a/src/face3d/util/__pycache__/preprocess.cpython-38.pyc b/src/face3d/util/__pycache__/preprocess.cpython-38.pyc
index 7900dafbd8b74629c391eb8972f615650d4461df..90eb37261ae38ab925f149db62d91a1d0078bfcf 100644
Binary files a/src/face3d/util/__pycache__/preprocess.cpython-38.pyc and b/src/face3d/util/__pycache__/preprocess.cpython-38.pyc differ
diff --git a/src/face3d/util/__pycache__/util.cpython-38.pyc b/src/face3d/util/__pycache__/util.cpython-38.pyc
deleted file mode 100644
index 56d6f9217276ff22306a567df4861f802e61a82a..0000000000000000000000000000000000000000
Binary files a/src/face3d/util/__pycache__/util.cpython-38.pyc and /dev/null differ
diff --git a/src/facerender/__pycache__/animate.cpython-38.pyc b/src/facerender/__pycache__/animate.cpython-38.pyc
index 11fb3d0ee467093c0cb318003c52eb4c78f11cc9..1f8003ddb550fc6e235abccfb5f8481ee8c16afa 100644
Binary files a/src/facerender/__pycache__/animate.cpython-38.pyc and b/src/facerender/__pycache__/animate.cpython-38.pyc differ
diff --git a/src/facerender/animate.py b/src/facerender/animate.py
index be2d62ebaeffe06a8dee1e268d832690b1937320..1bd221ad4c99d911222fdf1eb087ebb626afc867 100644
--- a/src/facerender/animate.py
+++ b/src/facerender/animate.py
@@ -16,6 +16,8 @@ from src.facerender.modules.make_animation import make_animation
from pydub import AudioSegment
from src.utils.face_enhancer import enhancer as face_enhancer
+from src.utils.paste_pic import paste_pic
+
class AnimateFromCoeff():
@@ -30,21 +32,26 @@ class AnimateFromCoeff():
**config['model_params']['common_params'])
kp_extractor = KPDetector(**config['model_params']['kp_detector_params'],
**config['model_params']['common_params'])
+ he_estimator = HEEstimator(**config['model_params']['he_estimator_params'],
+ **config['model_params']['common_params'])
mapping = MappingNet(**config['model_params']['mapping_params'])
generator.to(device)
kp_extractor.to(device)
+ he_estimator.to(device)
mapping.to(device)
for param in generator.parameters():
param.requires_grad = False
for param in kp_extractor.parameters():
param.requires_grad = False
+ for param in he_estimator.parameters():
+ param.requires_grad = False
for param in mapping.parameters():
param.requires_grad = False
if free_view_checkpoint is not None:
- self.load_cpk_facevid2vid(free_view_checkpoint, kp_detector=kp_extractor, generator=generator)
+ self.load_cpk_facevid2vid(free_view_checkpoint, kp_detector=kp_extractor, generator=generator, he_estimator=he_estimator)
else:
raise AttributeError("Checkpoint should be specified for video head pose estimator.")
@@ -55,10 +62,12 @@ class AnimateFromCoeff():
self.kp_extractor = kp_extractor
self.generator = generator
+ self.he_estimator = he_estimator
self.mapping = mapping
self.kp_extractor.eval()
self.generator.eval()
+ self.he_estimator.eval()
self.mapping.eval()
self.device = device
@@ -107,26 +116,35 @@ class AnimateFromCoeff():
return checkpoint['epoch']
- def generate(self, x, video_save_dir, enhancer=None, original_size=None):
+ def generate(self, x, video_save_dir, pic_path, crop_info, enhancer=None, full_img_enhancer=None):
source_image=x['source_image'].type(torch.FloatTensor)
source_semantics=x['source_semantics'].type(torch.FloatTensor)
- target_semantics=x['target_semantics_list'].type(torch.FloatTensor)
- yaw_c_seq = x['yaw_c_seq'].type(torch.FloatTensor)
- pitch_c_seq = x['pitch_c_seq'].type(torch.FloatTensor)
- roll_c_seq = x['roll_c_seq'].type(torch.FloatTensor)
+ target_semantics=x['target_semantics_list'].type(torch.FloatTensor)
source_image=source_image.to(self.device)
source_semantics=source_semantics.to(self.device)
target_semantics=target_semantics.to(self.device)
- yaw_c_seq = x['yaw_c_seq'].to(self.device)
- pitch_c_seq = x['pitch_c_seq'].to(self.device)
- roll_c_seq = x['roll_c_seq'].to(self.device)
+ if 'yaw_c_seq' in x:
+ yaw_c_seq = x['yaw_c_seq'].type(torch.FloatTensor)
+ yaw_c_seq = x['yaw_c_seq'].to(self.device)
+ else:
+ yaw_c_seq = None
+ if 'pitch_c_seq' in x:
+ pitch_c_seq = x['pitch_c_seq'].type(torch.FloatTensor)
+ pitch_c_seq = x['pitch_c_seq'].to(self.device)
+ else:
+ pitch_c_seq = None
+ if 'roll_c_seq' in x:
+ roll_c_seq = x['roll_c_seq'].type(torch.FloatTensor)
+ roll_c_seq = x['roll_c_seq'].to(self.device)
+ else:
+ roll_c_seq = None
frame_num = x['frame_num']
predictions_video = make_animation(source_image, source_semantics, target_semantics,
- self.generator, self.kp_extractor, self.mapping,
- yaw_c_seq, pitch_c_seq, roll_c_seq, use_exp = True,)
+ self.generator, self.kp_extractor, self.he_estimator, self.mapping,
+ yaw_c_seq, pitch_c_seq, roll_c_seq, use_exp = True)
predictions_video = predictions_video.reshape((-1,)+predictions_video.shape[2:])
predictions_video = predictions_video[:frame_num]
@@ -139,6 +157,7 @@ class AnimateFromCoeff():
result = img_as_ubyte(video)
### the generated video is 256x256, so we keep the aspect ratio,
+ original_size = crop_info[0]
if original_size:
result = [ cv2.resize(result_i,(256, int(256.0 * original_size[1]/original_size[0]) )) for result_i in result ]
@@ -157,7 +176,9 @@ class AnimateFromCoeff():
imageio.mimsave(enhanced_path, enhanced_images, fps=float(25))
- av_path = os.path.join(video_save_dir, video_name)
+ av_path = os.path.join(video_save_dir, video_name)
+ return_path = av_path
+
audio_path = x['audio_path']
audio_name = os.path.splitext(os.path.split(audio_path)[-1])[0]
new_audio_path = os.path.join(video_save_dir, audio_name+'.wav')
@@ -171,12 +192,28 @@ class AnimateFromCoeff():
cmd = r'ffmpeg -y -i "%s" -i "%s" -vcodec copy "%s"' % (path, new_audio_path, av_path)
os.system(cmd)
+ print(f'The generated video is named {video_name} in {video_save_dir}')
if enhancer:
+ return_path = av_path_enhancer
cmd = r'ffmpeg -y -i "%s" -i "%s" -vcodec copy "%s"' % (enhanced_path, new_audio_path, av_path_enhancer)
os.system(cmd)
os.remove(enhanced_path)
+ print(f'The generated video is named {video_name_enhancer} in {video_save_dir}')
+
+ if len(crop_info) == 3:
+ video_name_full = x['video_name'] + '_full.mp4'
+ full_video_path = os.path.join(video_save_dir, video_name_full)
+ return_path = full_video_path
+ if enhancer:
+ paste_pic(av_path_enhancer, pic_path, crop_info, new_audio_path, full_video_path)
+ else:
+ paste_pic(path, pic_path, crop_info, new_audio_path, full_video_path)
+ print(f'The generated video is named {video_name_full} in {video_save_dir}')
+
os.remove(path)
os.remove(new_audio_path)
+ return return_path
+
diff --git a/src/facerender/modules/__pycache__/animate_model.cpython-38.pyc b/src/facerender/modules/__pycache__/animate_model.cpython-38.pyc
deleted file mode 100644
index 1ecb83e033911eb82d582e097c513ea0fd4cb69a..0000000000000000000000000000000000000000
Binary files a/src/facerender/modules/__pycache__/animate_model.cpython-38.pyc and /dev/null differ
diff --git a/src/facerender/modules/__pycache__/animate_model.cpython-39.pyc b/src/facerender/modules/__pycache__/animate_model.cpython-39.pyc
deleted file mode 100644
index 8e9a594ddff05d41ed7fea66e42b37558869332a..0000000000000000000000000000000000000000
Binary files a/src/facerender/modules/__pycache__/animate_model.cpython-39.pyc and /dev/null differ
diff --git a/src/facerender/modules/__pycache__/dense_motion.cpython-38.pyc b/src/facerender/modules/__pycache__/dense_motion.cpython-38.pyc
index 5178c3763bc9f6fcff3a8a410deff7d3c30060db..7558dbc6512fceb2147fd1fae031212d07e4449d 100644
Binary files a/src/facerender/modules/__pycache__/dense_motion.cpython-38.pyc and b/src/facerender/modules/__pycache__/dense_motion.cpython-38.pyc differ
diff --git a/src/facerender/modules/__pycache__/dense_motion.cpython-39.pyc b/src/facerender/modules/__pycache__/dense_motion.cpython-39.pyc
deleted file mode 100644
index 9a6cec5db6525ef350d0fcd52efe814b0d3f1e6d..0000000000000000000000000000000000000000
Binary files a/src/facerender/modules/__pycache__/dense_motion.cpython-39.pyc and /dev/null differ
diff --git a/src/facerender/modules/__pycache__/generator.cpython-38.pyc b/src/facerender/modules/__pycache__/generator.cpython-38.pyc
index 8d132f05d36e505f21c864d4c95931472ba58051..11aa36c10f79820e84d8a275234b85b0371cc050 100644
Binary files a/src/facerender/modules/__pycache__/generator.cpython-38.pyc and b/src/facerender/modules/__pycache__/generator.cpython-38.pyc differ
diff --git a/src/facerender/modules/__pycache__/generator.cpython-39.pyc b/src/facerender/modules/__pycache__/generator.cpython-39.pyc
deleted file mode 100644
index ac9587fe99d8905d8ac99d60025ed1a8d5bacf1b..0000000000000000000000000000000000000000
Binary files a/src/facerender/modules/__pycache__/generator.cpython-39.pyc and /dev/null differ
diff --git a/src/facerender/modules/__pycache__/keypoint_detector.cpython-38.pyc b/src/facerender/modules/__pycache__/keypoint_detector.cpython-38.pyc
index ccc5d4543365bfc022a06a72d6ed9d388249279a..e0bd1dcd3e98a316628449370f08dc8bd2dde4b9 100644
Binary files a/src/facerender/modules/__pycache__/keypoint_detector.cpython-38.pyc and b/src/facerender/modules/__pycache__/keypoint_detector.cpython-38.pyc differ
diff --git a/src/facerender/modules/__pycache__/keypoint_detector.cpython-39.pyc b/src/facerender/modules/__pycache__/keypoint_detector.cpython-39.pyc
deleted file mode 100644
index e609a2ce2bea049dcc08e711684347032da88e1a..0000000000000000000000000000000000000000
Binary files a/src/facerender/modules/__pycache__/keypoint_detector.cpython-39.pyc and /dev/null differ
diff --git a/src/facerender/modules/__pycache__/make_animation.cpython-38.pyc b/src/facerender/modules/__pycache__/make_animation.cpython-38.pyc
index 1b54bcc293d742f70db165849b9764666b0f9a8b..76e338a936f0354c81abaa5fc677c5622db16eb3 100644
Binary files a/src/facerender/modules/__pycache__/make_animation.cpython-38.pyc and b/src/facerender/modules/__pycache__/make_animation.cpython-38.pyc differ
diff --git a/src/facerender/modules/__pycache__/mapping.cpython-38.pyc b/src/facerender/modules/__pycache__/mapping.cpython-38.pyc
index 7e1a2baa2bfab28fe7e3904f94a644633124b56c..b464c917a4d3feb94fa629b3390c000af89ceb9a 100644
Binary files a/src/facerender/modules/__pycache__/mapping.cpython-38.pyc and b/src/facerender/modules/__pycache__/mapping.cpython-38.pyc differ
diff --git a/src/facerender/modules/__pycache__/mapping5.cpython-38.pyc b/src/facerender/modules/__pycache__/mapping5.cpython-38.pyc
deleted file mode 100644
index ae35fb77f8552d2aa9cb263cba6ca9d37bbee9a7..0000000000000000000000000000000000000000
Binary files a/src/facerender/modules/__pycache__/mapping5.cpython-38.pyc and /dev/null differ
diff --git a/src/facerender/modules/__pycache__/mapping5.cpython-39.pyc b/src/facerender/modules/__pycache__/mapping5.cpython-39.pyc
deleted file mode 100644
index fa6b6db40007f95fca648909a638810273b2c050..0000000000000000000000000000000000000000
Binary files a/src/facerender/modules/__pycache__/mapping5.cpython-39.pyc and /dev/null differ
diff --git a/src/facerender/modules/__pycache__/util.cpython-38.pyc b/src/facerender/modules/__pycache__/util.cpython-38.pyc
index 1e1c92955be38c880c52cc70b8051fd8ef4fa63a..4f4d1a6d0e3797390e942821e1e2c238e1c8a8d2 100644
Binary files a/src/facerender/modules/__pycache__/util.cpython-38.pyc and b/src/facerender/modules/__pycache__/util.cpython-38.pyc differ
diff --git a/src/facerender/modules/__pycache__/util.cpython-39.pyc b/src/facerender/modules/__pycache__/util.cpython-39.pyc
deleted file mode 100644
index 8764b93cb4e5964b831caf9ff376b70105f3dc5d..0000000000000000000000000000000000000000
Binary files a/src/facerender/modules/__pycache__/util.cpython-39.pyc and /dev/null differ
diff --git a/src/facerender/modules/dense_motion.py b/src/facerender/modules/dense_motion.py
index 30c13060be8e82979771514b4ec51e5de23f49fa..a286ead2e84ed1961335d34a3b50ab38f25e4495 100644
--- a/src/facerender/modules/dense_motion.py
+++ b/src/facerender/modules/dense_motion.py
@@ -102,6 +102,10 @@ class DenseMotionNetwork(nn.Module):
mask = F.softmax(mask, dim=1)
out_dict['mask'] = mask
mask = mask.unsqueeze(2) # (bs, num_kp+1, 1, d, h, w)
+
+ zeros_mask = torch.zeros_like(mask)
+ mask = torch.where(mask < 1e-3, zeros_mask, mask)
+
sparse_motion = sparse_motion.permute(0, 1, 5, 2, 3, 4) # (bs, num_kp+1, 3, d, h, w)
deformation = (sparse_motion * mask).sum(dim=1) # (bs, 3, d, h, w)
deformation = deformation.permute(0, 2, 3, 4, 1) # (bs, d, h, w, 3)
diff --git a/src/facerender/modules/make_animation.py b/src/facerender/modules/make_animation.py
index 2b2382d82d26043145184b339103aac64abdaa62..e7887a3fed50d294948dd0a7d4c4956583b5f705 100644
--- a/src/facerender/modules/make_animation.py
+++ b/src/facerender/modules/make_animation.py
@@ -62,29 +62,33 @@ def get_rotation_matrix(yaw, pitch, roll):
return rot_mat
-def keypoint_transformation(kp_canonical, he):
+def keypoint_transformation(kp_canonical, he, wo_exp=False):
kp = kp_canonical['value'] # (bs, k, 3)
yaw, pitch, roll= he['yaw'], he['pitch'], he['roll']
yaw = headpose_pred_to_degree(yaw)
pitch = headpose_pred_to_degree(pitch)
roll = headpose_pred_to_degree(roll)
- if 'yaw_c' in he:
- yaw = yaw + he['yaw_c']
- if 'pitch_c' in he:
- pitch = pitch + he['pitch_c']
- if 'roll_c' in he:
- roll = roll + he['roll_c']
+ if 'yaw_in' in he:
+ yaw = he['yaw_in']
+ if 'pitch_in' in he:
+ pitch = he['pitch_in']
+ if 'roll_in' in he:
+ roll = he['roll_in']
rot_mat = get_rotation_matrix(yaw, pitch, roll) # (bs, 3, 3)
t, exp = he['t'], he['exp']
+ if wo_exp:
+ exp = exp*0
# keypoint rotation
kp_rotated = torch.einsum('bmp,bkp->bkm', rot_mat, kp)
# keypoint translation
- t = t.unsqueeze_(1).repeat(1, kp.shape[1], 1)
+ t[:, 0] = t[:, 0]*0
+ t[:, 2] = t[:, 2]*0
+ t = t.unsqueeze(1).repeat(1, kp.shape[1], 1)
kp_t = kp_rotated + t
# add expression deviation
@@ -96,7 +100,7 @@ def keypoint_transformation(kp_canonical, he):
def make_animation(source_image, source_semantics, target_semantics,
- generator, kp_detector, mapping,
+ generator, kp_detector, he_estimator, mapping,
yaw_c_seq=None, pitch_c_seq=None, roll_c_seq=None,
use_exp=True):
with torch.no_grad():
@@ -109,14 +113,12 @@ def make_animation(source_image, source_semantics, target_semantics,
for frame_idx in tqdm(range(target_semantics.shape[1]), 'Face Renderer:'):
target_semantics_frame = target_semantics[:, frame_idx]
he_driving = mapping(target_semantics_frame)
- if not use_exp:
- he_driving['exp'] = he_driving['exp']*0
if yaw_c_seq is not None:
- he_driving['yaw_c'] = yaw_c_seq[:, frame_idx]
+ he_driving['yaw_in'] = yaw_c_seq[:, frame_idx]
if pitch_c_seq is not None:
- he_driving['pitch_c'] = pitch_c_seq[:, frame_idx]
+ he_driving['pitch_in'] = pitch_c_seq[:, frame_idx]
if roll_c_seq is not None:
- he_driving['roll_c'] = roll_c_seq[:, frame_idx]
+ he_driving['roll_in'] = roll_c_seq[:, frame_idx]
kp_driving = keypoint_transformation(kp_canonical, he_driving)
@@ -124,6 +126,14 @@ def make_animation(source_image, source_semantics, target_semantics,
#kp_driving_initial=kp_driving_initial)
kp_norm = kp_driving
out = generator(source_image, kp_source=kp_source, kp_driving=kp_norm)
+ '''
+ source_image_new = out['prediction'].squeeze(1)
+ kp_canonical_new = kp_detector(source_image_new)
+ he_source_new = he_estimator(source_image_new)
+ kp_source_new = keypoint_transformation(kp_canonical_new, he_source_new, wo_exp=True)
+ kp_driving_new = keypoint_transformation(kp_canonical_new, he_driving, wo_exp=True)
+ out = generator(source_image_new, kp_source=kp_source_new, kp_driving=kp_driving_new)
+ '''
predictions.append(out['prediction'])
predictions_ts = torch.stack(predictions, dim=1)
return predictions_ts
diff --git a/src/facerender/sync_batchnorm/__pycache__/__init__.cpython-36.pyc b/src/facerender/sync_batchnorm/__pycache__/__init__.cpython-36.pyc
deleted file mode 100644
index 8327a281a1c119814499648bdec814cf753ba0ba..0000000000000000000000000000000000000000
Binary files a/src/facerender/sync_batchnorm/__pycache__/__init__.cpython-36.pyc and /dev/null differ
diff --git a/src/facerender/sync_batchnorm/__pycache__/__init__.cpython-37.pyc b/src/facerender/sync_batchnorm/__pycache__/__init__.cpython-37.pyc
deleted file mode 100644
index 4e9c9671abd49037eb51d66e7bb6046177433a27..0000000000000000000000000000000000000000
Binary files a/src/facerender/sync_batchnorm/__pycache__/__init__.cpython-37.pyc and /dev/null differ
diff --git a/src/facerender/sync_batchnorm/__pycache__/__init__.cpython-38.pyc b/src/facerender/sync_batchnorm/__pycache__/__init__.cpython-38.pyc
index 03d5fdb5ff0e14c08894b394b8c1cae7e1f324c4..a08f1284e68bb6251119739bc46a2dab9f5a171b 100644
Binary files a/src/facerender/sync_batchnorm/__pycache__/__init__.cpython-38.pyc and b/src/facerender/sync_batchnorm/__pycache__/__init__.cpython-38.pyc differ
diff --git a/src/facerender/sync_batchnorm/__pycache__/__init__.cpython-39.pyc b/src/facerender/sync_batchnorm/__pycache__/__init__.cpython-39.pyc
deleted file mode 100644
index 9c0d18c3cec16bbeccbc825186b14c60550563a1..0000000000000000000000000000000000000000
Binary files a/src/facerender/sync_batchnorm/__pycache__/__init__.cpython-39.pyc and /dev/null differ
diff --git a/src/facerender/sync_batchnorm/__pycache__/batchnorm.cpython-36.pyc b/src/facerender/sync_batchnorm/__pycache__/batchnorm.cpython-36.pyc
deleted file mode 100644
index 24a89a661e425c0b49c5d616759928e701eab005..0000000000000000000000000000000000000000
Binary files a/src/facerender/sync_batchnorm/__pycache__/batchnorm.cpython-36.pyc and /dev/null differ
diff --git a/src/facerender/sync_batchnorm/__pycache__/batchnorm.cpython-37.pyc b/src/facerender/sync_batchnorm/__pycache__/batchnorm.cpython-37.pyc
deleted file mode 100644
index d7658dccf719cd85ac0c6e6f6b190ffe6f32c5ed..0000000000000000000000000000000000000000
Binary files a/src/facerender/sync_batchnorm/__pycache__/batchnorm.cpython-37.pyc and /dev/null differ
diff --git a/src/facerender/sync_batchnorm/__pycache__/batchnorm.cpython-38.pyc b/src/facerender/sync_batchnorm/__pycache__/batchnorm.cpython-38.pyc
index 20a4560fc425087d5d63c70cc08fd12c2d8a7ea1..f1a96eace36b537e5cfc85be1be94616151aca85 100644
Binary files a/src/facerender/sync_batchnorm/__pycache__/batchnorm.cpython-38.pyc and b/src/facerender/sync_batchnorm/__pycache__/batchnorm.cpython-38.pyc differ
diff --git a/src/facerender/sync_batchnorm/__pycache__/batchnorm.cpython-39.pyc b/src/facerender/sync_batchnorm/__pycache__/batchnorm.cpython-39.pyc
deleted file mode 100644
index d1c07e4d0f03cd52a105f009d16f079559a5f97e..0000000000000000000000000000000000000000
Binary files a/src/facerender/sync_batchnorm/__pycache__/batchnorm.cpython-39.pyc and /dev/null differ
diff --git a/src/facerender/sync_batchnorm/__pycache__/comm.cpython-36.pyc b/src/facerender/sync_batchnorm/__pycache__/comm.cpython-36.pyc
deleted file mode 100644
index 7602415a703e1bd2b6008a9bf6dde9778d4349ae..0000000000000000000000000000000000000000
Binary files a/src/facerender/sync_batchnorm/__pycache__/comm.cpython-36.pyc and /dev/null differ
diff --git a/src/facerender/sync_batchnorm/__pycache__/comm.cpython-37.pyc b/src/facerender/sync_batchnorm/__pycache__/comm.cpython-37.pyc
deleted file mode 100644
index 1ce98838a834f854dbbc7a8d2f4f1295802e97f3..0000000000000000000000000000000000000000
Binary files a/src/facerender/sync_batchnorm/__pycache__/comm.cpython-37.pyc and /dev/null differ
diff --git a/src/facerender/sync_batchnorm/__pycache__/comm.cpython-38.pyc b/src/facerender/sync_batchnorm/__pycache__/comm.cpython-38.pyc
index eb7252b8ad1b6aec2f5566979db0494f71a63d91..e6578b03a7060d9b9b31681e6f7ef27e4251f52e 100644
Binary files a/src/facerender/sync_batchnorm/__pycache__/comm.cpython-38.pyc and b/src/facerender/sync_batchnorm/__pycache__/comm.cpython-38.pyc differ
diff --git a/src/facerender/sync_batchnorm/__pycache__/comm.cpython-39.pyc b/src/facerender/sync_batchnorm/__pycache__/comm.cpython-39.pyc
deleted file mode 100644
index b84f093a8aef9c2b92f0beead2318296163c9e1f..0000000000000000000000000000000000000000
Binary files a/src/facerender/sync_batchnorm/__pycache__/comm.cpython-39.pyc and /dev/null differ
diff --git a/src/facerender/sync_batchnorm/__pycache__/replicate.cpython-36.pyc b/src/facerender/sync_batchnorm/__pycache__/replicate.cpython-36.pyc
deleted file mode 100644
index 4a53e2cdf5b5c2d0f7fc9f6c928fe116d629a6c8..0000000000000000000000000000000000000000
Binary files a/src/facerender/sync_batchnorm/__pycache__/replicate.cpython-36.pyc and /dev/null differ
diff --git a/src/facerender/sync_batchnorm/__pycache__/replicate.cpython-37.pyc b/src/facerender/sync_batchnorm/__pycache__/replicate.cpython-37.pyc
deleted file mode 100644
index b91c03d671fb5a9334bd4791f6e1f55d397f2e62..0000000000000000000000000000000000000000
Binary files a/src/facerender/sync_batchnorm/__pycache__/replicate.cpython-37.pyc and /dev/null differ
diff --git a/src/facerender/sync_batchnorm/__pycache__/replicate.cpython-38.pyc b/src/facerender/sync_batchnorm/__pycache__/replicate.cpython-38.pyc
index 30c9811579d75333db1b60fe4622f682013f719b..90f775d27997dc8659edde9eb763d0f8b4007ace 100644
Binary files a/src/facerender/sync_batchnorm/__pycache__/replicate.cpython-38.pyc and b/src/facerender/sync_batchnorm/__pycache__/replicate.cpython-38.pyc differ
diff --git a/src/facerender/sync_batchnorm/__pycache__/replicate.cpython-39.pyc b/src/facerender/sync_batchnorm/__pycache__/replicate.cpython-39.pyc
deleted file mode 100644
index 561b184da4d393c548f7eb0b3076c765d4bf3745..0000000000000000000000000000000000000000
Binary files a/src/facerender/sync_batchnorm/__pycache__/replicate.cpython-39.pyc and /dev/null differ
diff --git a/src/generate_batch.py b/src/generate_batch.py
index 2d9e19b6aa4c19c13caf0a208e1189cd6c19f796..8bf580e49427527bfd1c2ff533de45ee91e3872e 100644
--- a/src/generate_batch.py
+++ b/src/generate_batch.py
@@ -48,7 +48,7 @@ def generate_blink_seq_randomly(num_frames):
break
return ratio
-def get_data(first_coeff_path, audio_path, device):
+def get_data(first_coeff_path, audio_path, device, ref_eyeblink_coeff_path):
syncnet_mel_step_size = 16
fps = 25
@@ -56,10 +56,6 @@ def get_data(first_coeff_path, audio_path, device):
pic_name = os.path.splitext(os.path.split(first_coeff_path)[-1])[0]
audio_name = os.path.splitext(os.path.split(audio_path)[-1])[0]
- source_semantics_path = first_coeff_path
- source_semantics_dict = scio.loadmat(source_semantics_path)
- ref_coeff = source_semantics_dict['coeff_3dmm'][:1,:70] #1 70
-
wav = audio.load_wav(audio_path, 16000)
wav_length, num_frames = parse_audio_length(len(wav), 16000, 25)
wav = crop_pad_audio(wav, wav_length)
@@ -76,7 +72,27 @@ def get_data(first_coeff_path, audio_path, device):
m = spec[seq, :]
indiv_mels.append(m.T)
indiv_mels = np.asarray(indiv_mels) # T 80 16
+
ratio = generate_blink_seq_randomly(num_frames) # T
+ source_semantics_path = first_coeff_path
+ source_semantics_dict = scio.loadmat(source_semantics_path)
+ ref_coeff = source_semantics_dict['coeff_3dmm'][:1,:70] #1 70
+ ref_coeff = np.repeat(ref_coeff, num_frames, axis=0)
+
+ if ref_eyeblink_coeff_path is not None:
+ ratio[:num_frames] = 0
+ refeyeblink_coeff_dict = scio.loadmat(ref_eyeblink_coeff_path)
+ refeyeblink_coeff = refeyeblink_coeff_dict['coeff_3dmm'][:,:64]
+ refeyeblink_num_frames = refeyeblink_coeff.shape[0]
+ if refeyeblink_num_frames None:
+ model_name = TTS.list_models()[0]
+ self.tts = TTS(model_name)
+
+ def test(self, text, language='en'):
+
+ tempf = tempfile.NamedTemporaryFile(
+ delete = False,
+ suffix = ('.'+'wav'),
+ )
+
+ self.tts.tts_to_file(text, speaker=self.tts.speakers[0], language=language, file_path=tempf.name)
+
+ return tempf.name
\ No newline at end of file