|
import os |
|
|
|
import gradio as gr |
|
from gradio_imageslider import ImageSlider |
|
import argparse |
|
from SUPIR.util import HWC3, upscale_image, fix_resize, convert_dtype |
|
import numpy as np |
|
import torch |
|
from SUPIR.util import create_SUPIR_model, load_QF_ckpt |
|
from PIL import Image |
|
from llava.llava_agent import LLavaAgent |
|
from CKPT_PTH import LLAVA_MODEL_PATH |
|
import einops |
|
import copy |
|
import time |
|
import spaces |
|
from huggingface_hub import hf_hub_download |
|
|
|
from diffusers import AnimateDiffPipeline, DDIMScheduler, MotionAdapter |
|
from diffusers.utils import export_to_gif |
|
from diffusers.utils import export_to_video |
|
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline |
|
import uuid |
|
|
|
hf_hub_download(repo_id="laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", filename="open_clip_pytorch_model.bin", local_dir="laion_CLIP-ViT-bigG-14-laion2B-39B-b160k") |
|
hf_hub_download(repo_id="camenduru/SUPIR", filename="sd_xl_base_1.0_0.9vae.safetensors", local_dir="yushan777_SUPIR") |
|
hf_hub_download(repo_id="camenduru/SUPIR", filename="SUPIR-v0F.ckpt", local_dir="yushan777_SUPIR") |
|
hf_hub_download(repo_id="camenduru/SUPIR", filename="SUPIR-v0Q.ckpt", local_dir="yushan777_SUPIR") |
|
hf_hub_download(repo_id="RunDiffusion/Juggernaut-XL-Lightning", filename="Juggernaut_RunDiffusionPhoto2_Lightning_4Steps.safetensors", local_dir="RunDiffusion_Juggernaut-XL-Lightning") |
|
|
|
parser = argparse.ArgumentParser() |
|
parser.add_argument("--opt", type=str, default='options/SUPIR_v0.yaml') |
|
parser.add_argument("--ip", type=str, default='127.0.0.1') |
|
parser.add_argument("--port", type=int, default='6688') |
|
parser.add_argument("--no_llava", action='store_true', default=False) |
|
parser.add_argument("--use_image_slider", action='store_true', default=False) |
|
parser.add_argument("--log_history", action='store_true', default=False) |
|
parser.add_argument("--loading_half_params", action='store_true', default=False) |
|
parser.add_argument("--use_tile_vae", action='store_true', default=False) |
|
parser.add_argument("--encoder_tile_size", type=int, default=512) |
|
parser.add_argument("--decoder_tile_size", type=int, default=64) |
|
parser.add_argument("--load_8bit_llava", action='store_true', default=False) |
|
args = parser.parse_args() |
|
server_ip = args.ip |
|
server_port = args.port |
|
use_llava = not args.no_llava |
|
|
|
if torch.cuda.device_count() > 0: |
|
if torch.cuda.device_count() >= 2: |
|
SUPIR_device = 'cuda:0' |
|
LLaVA_device = 'cuda:1' |
|
elif torch.cuda.device_count() == 1: |
|
SUPIR_device = 'cuda:0' |
|
LLaVA_device = 'cuda:0' |
|
else: |
|
SUPIR_device = 'cpu' |
|
LLaVA_device = 'cpu' |
|
|
|
|
|
model, default_setting = create_SUPIR_model(args.opt, SUPIR_sign='Q', load_default_setting=True) |
|
if args.loading_half_params: |
|
model = model.half() |
|
if args.use_tile_vae: |
|
model.init_tile_vae(encoder_tile_size=args.encoder_tile_size, decoder_tile_size=args.decoder_tile_size) |
|
model = model.to(SUPIR_device) |
|
model.first_stage_model.denoise_encoder_s1 = copy.deepcopy(model.first_stage_model.denoise_encoder) |
|
model.current_model = 'v0-Q' |
|
ckpt_Q, ckpt_F = load_QF_ckpt(args.opt) |
|
|
|
|
|
if use_llava: |
|
llava_agent = LLavaAgent(LLAVA_MODEL_PATH, device=LLaVA_device, load_8bit=args.load_8bit_llava, load_4bit=False) |
|
else: |
|
llava_agent = None |
|
|
|
|
|
|
|
adapter_options = { |
|
"zoom-out":"guoyww/animatediff-motion-lora-zoom-out", |
|
"zoom-in":"guoyww/animatediff-motion-lora-zoom-in", |
|
"pan-left":"guoyww/animatediff-motion-lora-pan-left", |
|
"pan-right":"guoyww/animatediff-motion-lora-pan-right", |
|
"roll-clockwise":"guoyww/animatediff-motion-lora-rolling-clockwise", |
|
"roll-anticlockwise":"guoyww/animatediff-motion-lora-rolling-anticlockwise", |
|
"tilt-up":"guoyww/animatediff-motion-lora-tilt-up", |
|
"tilt-down":"guoyww/animatediff-motion-lora-tilt-down" |
|
} |
|
|
|
def load_cached_examples(): |
|
examples = [ |
|
["a cat playing with a ball of yarn", "blurry", 7.5, 12, ["zoom-in"]], |
|
["a dog running in a field", "dark, indoors", 8.0, 8, ["pan-left", "tilt-up"]], |
|
] |
|
return examples |
|
|
|
device = "cuda" |
|
adapter = MotionAdapter.from_pretrained("guoyww/animatediff-motion-adapter-v1-5-2", torch_dtype=torch.float16) |
|
model_id = "SG161222/Realistic_Vision_V5.1_noVAE" |
|
|
|
pipe = AnimateDiffPipeline.from_pretrained(model_id, motion_adapter=adapter, torch_dtype=torch.float16).to(device) |
|
scheduler = DDIMScheduler.from_pretrained( |
|
model_id, |
|
subfolder="scheduler", |
|
clip_sample=False, |
|
timestep_spacing="linspace", |
|
beta_schedule="linear", |
|
steps_offset=1, |
|
) |
|
pipe.scheduler = scheduler |
|
|
|
@spaces.GPU |
|
def generate_video(prompt,negative_prompt, guidance_scale, num_inference_steps, adapter_choices): |
|
|
|
pipe.to(device) |
|
|
|
|
|
if adapter_choices: |
|
for i in range(len(adapter_choices)): |
|
adapter_name = adapter_choices[i] |
|
pipe.load_lora_weights( |
|
adapter_options[adapter_name], adapter_name=adapter_name, |
|
) |
|
pipe.set_adapters(adapter_choices, adapter_weights=[1.0] * len(adapter_choices)) |
|
print(adapter_choices) |
|
|
|
output = pipe( |
|
prompt=prompt, |
|
negative_prompt=negative_prompt, |
|
num_frames=16, |
|
guidance_scale=guidance_scale, |
|
num_inference_steps=num_inference_steps, |
|
) |
|
name = str(uuid.uuid4()).replace("-", "") |
|
path = f"/tmp/{name}.mp4" |
|
export_to_video(output.frames[0], path, fps=10) |
|
return path |
|
|
|
|
|
|
|
iface = gr.Interface( |
|
theme=gr.themes.Soft(primary_hue="cyan", secondary_hue="teal"), |
|
fn=generate_video, |
|
inputs=[ |
|
gr.Textbox(label="Prompt"), |
|
gr.Textbox(label="Negative Prompt"), |
|
gr.Slider(minimum=0.5, maximum=10, value=7.5, label="Guidance Scale"), |
|
gr.Slider(minimum=4, maximum=24, step=4, value=4, label="Inference Steps"), |
|
gr.CheckboxGroup(adapter_options.keys(), label="Adapter Choice",type='value'), |
|
], |
|
outputs=gr.Video(label="Generated Video"), |
|
examples = [ |
|
["Urban ambiance, man walking, neon lights, rain, wet floor, high quality", "bad quality", 7.5, 24, []], |
|
["Nature, farms, mountains in background, drone shot, high quality","bad quality" ,8.0, 24, []], |
|
], |
|
cache_examples=True |
|
) |
|
|
|
iface.launch() |