docs: update input requirements

#5
Files changed (1) hide show
  1. app.py +0 -17
app.py CHANGED
@@ -91,23 +91,6 @@ with gr.Blocks(css=css) as demo:
91
  ''', elem_id="warning-duplicate")
92
  gr.Markdown("# Demo for Hallo: Hierarchical Audio-Driven Visual Synthesis for Portrait Image Animation")
93
  gr.Markdown("Generate talking head avatars driven from audio. **5 seconds of audio takes >10 minutes to generate on an L4** - duplicate the space for private use or try for free on Google Colab")
94
- gr.Markdown("""
95
- Hallo has a few simple requirements for input data:
96
-
97
- For the source image:
98
-
99
- 1. It should be cropped into squares.
100
- 2. The face should be the main focus, making up 50%-70% of the image.
101
- 3. The face should be facing forward, with a rotation angle of less than 30° (no side profiles).
102
-
103
- For the driving audio:
104
-
105
- 1. It must be in WAV format.
106
- 2. It must be in English since our training datasets are only in this language.
107
- 3. Ensure the vocals are clear; background music is acceptable.
108
-
109
- We have provided some [samples](https://huggingface.co/datasets/fudan-generative-ai/hallo_inference_samples) for your reference.
110
- """)
111
  with gr.Row():
112
  with gr.Column():
113
  avatar_face = gr.Image(type="filepath", label="Face")
 
91
  ''', elem_id="warning-duplicate")
92
  gr.Markdown("# Demo for Hallo: Hierarchical Audio-Driven Visual Synthesis for Portrait Image Animation")
93
  gr.Markdown("Generate talking head avatars driven from audio. **5 seconds of audio takes >10 minutes to generate on an L4** - duplicate the space for private use or try for free on Google Colab")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  with gr.Row():
95
  with gr.Column():
96
  avatar_face = gr.Image(type="filepath", label="Face")