ava-1 / persona.py
GowthamYarlagadda's picture
Upload 304 files
b36e9ec verified
from config import *
from speech import generate_speech
from image import generate_image
from lips import modify_lips
import humanize
import datetime as dt
from argparse import ArgumentParser
import shutil
import os
import glob
from improve import improve, vid2frames, restore_frames
from animate_face import animate_face
message = """Over the holiday season, capturing photos and videos of the festivities with family and friends
is an important activity for many. The iPhone has a suite of camera features that can significantly elevate
the quality and creativity of your holiday photos and videos."""
#message = """Apple today confirmed that it will be permanently closing its Infinite Loop retail store in
#Cupertino, California on January 20. Infinite Loop served as Apple's headquarters between the mid-1990s and
#2017, when its current Apple Park headquarters opened a few miles away."""
def main():
parser = ArgumentParser()
parser.add_argument("--improve", action="store_true", help="use Real ESRGAN to improve the video")
parser.add_argument("--skipgen", action="store_true", help="improve the video only")
parser.add_argument("--path_id", default=str(int(time.time())), help="set the path id to use")
parser.add_argument("--speech", default=audiofile, help="path to WAV speech file")
parser.add_argument("--image", default=imgfile, help="path to avatar file")
args = parser.parse_args()
tstart = time.time()
## SET PATH
path_id = args.path_id
path = os.path.join("temp", path_id)
print("path_id:", path_id, "path:", path)
os.makedirs(path, exist_ok=True)
outfile = os.path.join("results", path_id + "_small.mp4")
finalfile = os.path.join("results", path_id + "_large.mp4")
if not args.skipgen:
## GENERATE SPEECH
tspeech = "None"
if args.speech == audiofile:
print("-----------------------------------------")
print("generating speech")
t0 = time.time()
generate_speech(path_id, audiofile, "daniel", message, "ultra_fast")
tspeech = humanize.naturaldelta(dt.timedelta(seconds=int(time.time() - t0)))
print("\ngenerating speech:", tspeech)
else:
print("using:", args.speech)
shutil.copyfile(args.speech, os.path.join("temp", path_id, audiofile))
## GENERATE AVATAR IMAGE
timage = "avatar.png"
shutil.copyfile(timage, os.path.join("temp", path_id, imgfile))
shutil.copyfile(args.image, os.path.join("temp", path_id, imgfile))
shutil.copyfile(args.image, os.path.join("temp", path_id, timage))
#if args.image == imgfile:
#print("-----------------------------------------")
#print("generating avatar image")
#t1 = time.time()
#avatar_description = "Middle-aged black man, Idris Elba, with short dark hair, serious look"
#generate_image(path_id, imgfile, f"hyperrealistic digital avatar, centered, {avatar_description}, \
# rim lighting, studio lighting, looking at the camera")
#timage = humanize.naturaldelta(dt.timedelta(seconds=int(time.time() - t1)))
#print("\ngenerating avatar:", timage)
#else:
#shutil.copyfile(args.image, os.path.join("temp", path_id, imgfile))
## ANIMATE AVATAR IMAGE
print("-----------------------------------------")
print("animating face with driver")
t2 = time.time()
# audiofile determines the length of the driver movie to trim
# driver movie is imposed on the image file to produce the animated file
animate_face(path_id, audiofile, driverfile, imgfile, animatedfile)
tanimate = humanize.naturaldelta(dt.timedelta(seconds=int(time.time() - t2)))
print("\nanimating face:", tanimate)
## MODIFY LIPS TO FIT THE SPEECH
print("-----------------------------------------")
print("modifying lips")
t3 = time.time()
os.makedirs("results", exist_ok=True)
modify_lips(path_id, audiofile, animatedfile, outfile)
tlips = humanize.naturaldelta(dt.timedelta(seconds=int(time.time() - t3)))
print("\nmodifying lips:", tlips)
## IMPROVE THE OUTPUT VIDEO
if args.improve:
t4 = time.time()
print("-----------------------------------------")
print("converting video to frames")
shutil.rmtree(os.path.join(path, "improve"), ignore_errors=True)
os.makedirs(os.path.join(path, "improve", "disassembled"), exist_ok=True)
os.makedirs(os.path.join(path, "improve", "improved"), exist_ok=True)
vid2frames(outfile, os.path.join(path, "improve", "disassembled"))
print("-----------------------------------------")
print("improving face")
improve(os.path.join(path, "improve", "disassembled"), os.path.join(path, "improve", "improved"))
print("-----------------------------------------")
print("restoring frames")
restore_frames(os.path.join(path, audiofile), finalfile, os.path.join(path, "improve", "improved"))
timprove = humanize.naturaldelta(dt.timedelta(seconds=int(time.time() - t4)))
print("\nimproving video:", timprove)
print("done")
print("Overall timing")
print("--------------")
if not args.skipgen:
print("generating speech:", tspeech)
print("generating avatar image:", timage)
print("animating face:", tanimate)
print("modifying lips:", tlips)
if args.improve:
print("improving finished video:", timprove)
print("total time:", humanize.naturaldelta(dt.timedelta(seconds=int(time.time() - tstart))))
if __name__ == '__main__':
main()