Spaces:
Paused
Paused
File size: 6,319 Bytes
6638ae9 a11fc96 6638ae9 a11fc96 6638ae9 a11fc96 bd6a6a5 60e968d 811f1d8 60e968d 811f1d8 60e968d a11fc96 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
import os
import gradio as gr
from gradio_imageslider import ImageSlider
import argparse
from SUPIR.util import HWC3, upscale_image, fix_resize, convert_dtype
import numpy as np
import torch
from SUPIR.util import create_SUPIR_model, load_QF_ckpt
from PIL import Image
from llava.llava_agent import LLavaAgent
from CKPT_PTH import LLAVA_MODEL_PATH
import einops
import copy
import time
import spaces
from huggingface_hub import hf_hub_download
from diffusers import AnimateDiffPipeline, DDIMScheduler, MotionAdapter
from diffusers.utils import export_to_gif
from diffusers.utils import export_to_video
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import uuid
hf_hub_download(repo_id="laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", filename="open_clip_pytorch_model.bin", local_dir="laion_CLIP-ViT-bigG-14-laion2B-39B-b160k")
hf_hub_download(repo_id="camenduru/SUPIR", filename="sd_xl_base_1.0_0.9vae.safetensors", local_dir="yushan777_SUPIR")
hf_hub_download(repo_id="camenduru/SUPIR", filename="SUPIR-v0F.ckpt", local_dir="yushan777_SUPIR")
hf_hub_download(repo_id="camenduru/SUPIR", filename="SUPIR-v0Q.ckpt", local_dir="yushan777_SUPIR")
hf_hub_download(repo_id="RunDiffusion/Juggernaut-XL-Lightning", filename="Juggernaut_RunDiffusionPhoto2_Lightning_4Steps.safetensors", local_dir="RunDiffusion_Juggernaut-XL-Lightning")
parser = argparse.ArgumentParser()
parser.add_argument("--opt", type=str, default='options/SUPIR_v0.yaml')
parser.add_argument("--ip", type=str, default='127.0.0.1')
parser.add_argument("--port", type=int, default='6688')
parser.add_argument("--no_llava", action='store_true', default=False)
parser.add_argument("--use_image_slider", action='store_true', default=False)
parser.add_argument("--log_history", action='store_true', default=False)
parser.add_argument("--loading_half_params", action='store_true', default=False)
parser.add_argument("--use_tile_vae", action='store_true', default=False)
parser.add_argument("--encoder_tile_size", type=int, default=512)
parser.add_argument("--decoder_tile_size", type=int, default=64)
parser.add_argument("--load_8bit_llava", action='store_true', default=False)
args = parser.parse_args()
server_ip = args.ip
server_port = args.port
use_llava = not args.no_llava
if torch.cuda.device_count() > 0:
if torch.cuda.device_count() >= 2:
SUPIR_device = 'cuda:0'
LLaVA_device = 'cuda:1'
elif torch.cuda.device_count() == 1:
SUPIR_device = 'cuda:0'
LLaVA_device = 'cuda:0'
else:
SUPIR_device = 'cpu'
LLaVA_device = 'cpu'
# load SUPIR
model, default_setting = create_SUPIR_model(args.opt, SUPIR_sign='Q', load_default_setting=True)
if args.loading_half_params:
model = model.half()
if args.use_tile_vae:
model.init_tile_vae(encoder_tile_size=args.encoder_tile_size, decoder_tile_size=args.decoder_tile_size)
model = model.to(SUPIR_device)
model.first_stage_model.denoise_encoder_s1 = copy.deepcopy(model.first_stage_model.denoise_encoder)
model.current_model = 'v0-Q'
#ckpt_Q, ckpt_F = load_QF_ckpt(args.opt)
# load LLaVA
#if use_llava:
#llava_agent = LLavaAgent(LLAVA_MODEL_PATH, device=LLaVA_device, load_8bit=args.load_8bit_llava, load_4bit=False)
#else:
#llava_agent = None
# Available adapters (replace with your actual adapter names)
adapter_options = {
"zoom-out":"guoyww/animatediff-motion-lora-zoom-out",
"zoom-in":"guoyww/animatediff-motion-lora-zoom-in",
"pan-left":"guoyww/animatediff-motion-lora-pan-left",
"pan-right":"guoyww/animatediff-motion-lora-pan-right",
"roll-clockwise":"guoyww/animatediff-motion-lora-rolling-clockwise",
"roll-anticlockwise":"guoyww/animatediff-motion-lora-rolling-anticlockwise",
"tilt-up":"guoyww/animatediff-motion-lora-tilt-up",
"tilt-down":"guoyww/animatediff-motion-lora-tilt-down"
}
def load_cached_examples():
examples = [
["a cat playing with a ball of yarn", "blurry", 7.5, 12, ["zoom-in"]],
["a dog running in a field", "dark, indoors", 8.0, 8, ["pan-left", "tilt-up"]],
]
return examples
device = "cuda"
adapter = MotionAdapter.from_pretrained("guoyww/animatediff-motion-adapter-v1-5-2", torch_dtype=torch.float16)
model_id = "SG161222/Realistic_Vision_V5.1_noVAE"
pipe = AnimateDiffPipeline.from_pretrained(model_id, motion_adapter=adapter, torch_dtype=torch.float16).to(device)
scheduler = DDIMScheduler.from_pretrained(
model_id,
subfolder="scheduler",
clip_sample=False,
timestep_spacing="linspace",
beta_schedule="linear",
steps_offset=1,
)
pipe.scheduler = scheduler
@spaces.GPU
def generate_video(prompt,negative_prompt, guidance_scale, num_inference_steps, adapter_choices):
pipe.to(device)
# Set adapters based on user selection
if adapter_choices:
for i in range(len(adapter_choices)):
adapter_name = adapter_choices[i]
pipe.load_lora_weights(
adapter_options[adapter_name], adapter_name=adapter_name,
)
pipe.set_adapters(adapter_choices, adapter_weights=[1.0] * len(adapter_choices))
print(adapter_choices)
output = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
num_frames=16,
guidance_scale=guidance_scale,
num_inference_steps=num_inference_steps,
)
name = str(uuid.uuid4()).replace("-", "")
path = f"/tmp/{name}.mp4"
export_to_video(output.frames[0], path, fps=10)
return path
iface = gr.Interface(
theme=gr.themes.Soft(primary_hue="cyan", secondary_hue="teal"),
fn=generate_video,
inputs=[
gr.Textbox(label="Prompt"),
gr.Textbox(label="Negative Prompt"),
gr.Slider(minimum=0.5, maximum=10, value=7.5, label="Guidance Scale"),
gr.Slider(minimum=4, maximum=24, step=4, value=4, label="Inference Steps"),
gr.CheckboxGroup(adapter_options.keys(), label="Adapter Choice",type='value'),
],
outputs=gr.Video(label="Generated Video"),
examples = [
["Urban ambiance, man walking, neon lights, rain, wet floor, high quality", "bad quality", 7.5, 24, []],
["Nature, farms, mountains in background, drone shot, high quality","bad quality" ,8.0, 24, []],
],
cache_examples=True
)
iface.launch() |