File size: 5,592 Bytes
9ac31b8 07c1f7a 9ac31b8 3b06696 f226a29 3b06696 9ac31b8 3b06696 a9235bb 25d3956 9de0ad3 e3d310b 07c1f7a d56d267 36be800 9de0ad3 9ac31b8 48f2aa4 d56d267 9ac31b8 8010ebe 9ac31b8 d56d267 c907ab3 45f3f73 0cd72ee 53a5202 3b06696 492fffc 3b06696 9ac31b8 d56d267 3b06696 36be800 311fe07 9664ead 36be800 9ac31b8 d56d267 9ac31b8 0cd72ee efa319b 9ac31b8 f58ac54 9ac31b8 23224a3 f58ac54 3b06696 0cd72ee d56d267 3b06696 d56d267 3b06696 d56d267 ff46702 d56d267 45f3f73 32097c5 45f3f73 32097c5 45f3f73 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import gradio as gr
#import gradio.helpers
import torch
import os
import base64
from glob import glob
from pathlib import Path
from typing import Optional
from diffusers import StableVideoDiffusionPipeline
from diffusers.utils import load_image, export_to_video
from PIL import Image
import uuid
import random
from huggingface_hub import login, hf_hub_download
#gradio.helpers.CACHED_FOLDER = '/data/cache'
SECRET_TOKEN = os.getenv('SECRET_TOKEN', 'default_secret')
HF_API_KEY = os.getenv('HF_API_KEY', '')
login(token=HF_API_KEY)
pipe = StableVideoDiffusionPipeline.from_pretrained(
"stabilityai/stable-video-diffusion-img2vid-xt-1-1",
torch_dtype=torch.float16,
variant="fp16"
)
pipe.to("cuda")
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
#pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=True)
max_64_bit_int = 2**63 - 1
def generate_video(
secret_token: str,
image: Image,
seed: int,
motion_bucket_id: int = 127,
fps_id: int = 6,
version: str = "svd_xt",
cond_aug: float = 0.02,
decoding_t: int = 3, # Number of frames decoded at a time! This eats most VRAM. Reduce if necessary.
device: str = "cuda",
output_folder: str = "outputs",
):
if secret_token != SECRET_TOKEN:
raise gr.Error(
f'Invalid secret token. Please fork the original space if you want to use it for yourself.')
# note julian: normally we should resize input images, but normally they are already in 1024x576, so..
# also, I would like to experiment with vertical videos, and 1024x512 videos
image = resize_image(image)
if image.mode == "RGBA":
image = image.convert("RGB")
generator = torch.manual_seed(seed)
os.makedirs(output_folder, exist_ok=True)
base_count = len(glob(os.path.join(output_folder, "*.mp4")))
video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")
frames = pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=0.1, num_frames=25).frames[0]
export_to_video(frames, video_path, fps=fps_id)
torch.manual_seed(seed)
# Read the content of the video file and encode it to base64
with open(video_path, "rb") as video_file:
video_base64 = base64.b64encode(video_file.read()).decode('utf-8')
# Prepend the appropriate data URI header with MIME type
video_data_uri = 'data:video/mp4;base64,' + video_base64
# clean-up (otherwise there is a risk of "ghosting", eg. someone seeing the previous generated video",
# of one of the steps go wrong)
os.remove(video_path)
return video_data_uri
def resize_image(image, output_size=(1024, 576)):
# Calculate aspect ratios
target_aspect = output_size[0] / output_size[1] # Aspect ratio of the desired size
image_aspect = image.width / image.height # Aspect ratio of the original image
# Resize then crop if the original image is larger
if image_aspect > target_aspect:
# Resize the image to match the target height, maintaining aspect ratio
new_height = output_size[1]
new_width = int(new_height * image_aspect)
resized_image = image.resize((new_width, new_height), Image.LANCZOS)
# Calculate coordinates for cropping
left = (new_width - output_size[0]) / 2
top = 0
right = (new_width + output_size[0]) / 2
bottom = output_size[1]
else:
# Resize the image to match the target width, maintaining aspect ratio
new_width = output_size[0]
new_height = int(new_width / image_aspect)
resized_image = image.resize((new_width, new_height), Image.LANCZOS)
# Calculate coordinates for cropping
left = 0
top = (new_height - output_size[1]) / 2
right = output_size[0]
bottom = (new_height + output_size[1]) / 2
# Crop the image
cropped_image = resized_image.crop((left, top, right, bottom))
return cropped_image
with gr.Blocks() as demo:
secret_token = gr.Text(
label='Secret Token',
max_lines=1,
placeholder='Enter your secret token')
gr.HTML("""
<div style="z-index: 100; position: fixed; top: 0px; right: 0px; left: 0px; bottom: 0px; width: 100%; height: 100%; background: white; display: flex; align-items: center; justify-content: center; color: black;">
<div style="text-align: center; color: black;">
<p style="color: black;">This space is a REST API to programmatically generate MP4 videos.</p>
<p style="color: black;">Interested in using it? Look no further than the <a href="https://huggingface.co/spaces/multimodalart/stable-video-diffusion" target="_blank">original space</a>!</p>
</div>
</div>""")
image = gr.Image(label="Upload your image", type="pil")
generate_btn = gr.Button("Generate")
base64_out = gr.Textbox(label="Base64 Video")
seed = gr.Slider(label="Seed", value=42, randomize=False, minimum=0, maximum=max_64_bit_int, step=1)
motion_bucket_id = gr.Slider(label="Motion bucket id", info="Controls how much motion to add/remove from the image", value=127, minimum=1, maximum=255)
fps_id = gr.Slider(label="Frames per second", info="The length of your video in seconds will be 25/fps", value=6, minimum=5, maximum=30)
generate_btn.click(
fn=generate_video,
inputs=[secret_token, image, seed, motion_bucket_id, fps_id],
outputs=base64_out,
api_name="run")
demo.queue(max_size=20).launch() |