hallo / app.py
saicharan1234's picture
Update app.py
69dd2a2 verified
raw
history blame
2.5 kB
import os
import shutil
from huggingface_hub import snapshot_download
import gradio as gr
os.chdir(os.path.dirname(os.path.abspath(__file__)))
from scripts.inference import inference_process
import argparse
import uuid
is_shared_ui = True if "fudan-generative-ai/hallo" in os.environ['SPACE_ID'] else False
if not is_shared_ui:
hallo_dir = snapshot_download(repo_id="fudan-generative-ai/hallo", local_dir="pretrained_models")
def run_inference(source_image, driving_audio, pose_weight, face_weight, lip_weight, face_expand_ratio, progress=gr.Progress(track_tqdm=True)):
if is_shared_ui:
raise gr.Error("This Space only works in duplicated instances")
unique_id = uuid.uuid4()
args = argparse.Namespace(
config='configs/inference/default.yaml',
source_image=source_image,
driving_audio=driving_audio,
output=f'output-{unique_id}.mp4',
pose_weight=pose_weight,
face_weight=face_weight,
lip_weight=lip_weight,
face_expand_ratio=face_expand_ratio,
checkpoint=None
)
inference_process(args)
return f'output-{unique_id}.mp4'
with gr.Blocks(theme='freddyaboulton/dracula_revamped@0.3.8' ) as demo:
gr.Markdown(
"""
# Talking Head Generation
Upload a face image and driving audio, and adjust the weights to generate a talking head video.
"""
)
with gr.Row():
with gr.Column():
avatar_face = gr.Image(type="filepath", label="Face", elem_id="face-input")
driving_audio = gr.Audio(type="filepath", label="Driving Audio", elem_id="audio-input")
with gr.Column():
output_video = gr.Video(label="Your Talking Head", elem_id="output-video")
with gr.Accordion("Advanced Settings", open=False):
pose_weight = gr.Slider(minimum=0.0, value=1.5, label="Pose Weight")
face_weight = gr.Slider(minimum=0.0, value=1.0, label="Face Weight")
lip_weight = gr.Slider(minimum=0.0, value=1.1, label="Lip Weight")
face_expand_ratio = gr.Slider(minimum=0.0, value=1.2, label="Face Expand Ratio")
generate = gr.Button("Generate", elem_id="generate-button")
generate.click(
fn=run_inference,
inputs=[avatar_face, driving_audio, pose_weight, face_weight, lip_weight, face_expand_ratio],
outputs=output_video
)
demo.launch()