File size: 2,496 Bytes
63f899c
 
 
 
59d9186
63f899c
 
59d9186
63f899c
0f1045d
bd786ec
7b988f1
c39b894
63f899c
7b988f1
a6075c0
 
 
59d9186
 
63f899c
59d9186
751c5b7
 
59d9186
7b988f1
 
 
 
59d9186
63f899c
 
 
59d9186
63f899c
69dd2a2
7b988f1
 
 
 
 
 
 
a6075c0
 
7b988f1
 
 
69dd2a2
 
 
 
7b988f1
 
 
 
 
 
 
a6075c0
 
 
7b988f1
a6075c0
 
 
7b988f1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import os
import shutil
from huggingface_hub import snapshot_download
import gradio as gr
os.chdir(os.path.dirname(os.path.abspath(__file__)))
from scripts.inference import inference_process
import argparse
import uuid

is_shared_ui = True if "fudan-generative-ai/hallo" in os.environ['SPACE_ID'] else False

if not is_shared_ui:
    hallo_dir = snapshot_download(repo_id="fudan-generative-ai/hallo", local_dir="pretrained_models")

def run_inference(source_image, driving_audio, pose_weight, face_weight, lip_weight, face_expand_ratio, progress=gr.Progress(track_tqdm=True)):
    if is_shared_ui:
        raise gr.Error("This Space only works in duplicated instances")
        
    unique_id = uuid.uuid4()
    
    args = argparse.Namespace(
        config='configs/inference/default.yaml',
        source_image=source_image,
        driving_audio=driving_audio,
        output=f'output-{unique_id}.mp4',
        pose_weight=pose_weight,
        face_weight=face_weight,
        lip_weight=lip_weight,
        face_expand_ratio=face_expand_ratio,
        checkpoint=None
    )
    
    inference_process(args)
    return f'output-{unique_id}.mp4' 

with gr.Blocks(theme='freddyaboulton/dracula_revamped@0.3.8' ) as demo:
    gr.Markdown(
        """
        # Talking Head Generation
        Upload a face image and driving audio, and adjust the weights to generate a talking head video.
        """
    )
    
    with gr.Row():
        with gr.Column():
            avatar_face = gr.Image(type="filepath", label="Face", elem_id="face-input")
            driving_audio = gr.Audio(type="filepath", label="Driving Audio", elem_id="audio-input")
            
            
        
        with gr.Column():
            output_video = gr.Video(label="Your Talking Head", elem_id="output-video")
            with gr.Accordion("Advanced Settings", open=False):
                pose_weight = gr.Slider(minimum=0.0, value=1.5, label="Pose Weight")
                face_weight = gr.Slider(minimum=0.0, value=1.0, label="Face Weight")
                lip_weight = gr.Slider(minimum=0.0, value=1.1, label="Lip Weight")
                face_expand_ratio = gr.Slider(minimum=0.0, value=1.2, label="Face Expand Ratio")
            
            generate = gr.Button("Generate", elem_id="generate-button")

    generate.click(
        fn=run_inference,
        inputs=[avatar_face, driving_audio, pose_weight, face_weight, lip_weight, face_expand_ratio],
        outputs=output_video
    )
    
demo.launch()