SakanaAI
/

Evo-Ukiyoe-v1

Text-to-Image

Diffusers

Japanese

stable-diffusion

Model card Files Files and versions Community

yuki-imajuku commited on Jul 11

Commit

f8c0350

•

1 Parent(s): 03a7b59

Rename evoukiyoe_v1.py to evo_ukiyoe_v1.py

Browse files

Files changed (1) hide show

evoukiyoe_v1.py → evo_ukiyoe_v1.py +27 -20

evoukiyoe_v1.py → evo_ukiyoe_v1.py RENAMED Viewed

@@ -6,22 +6,20 @@ from diffusers import (
     StableDiffusionXLPipeline,
     UNet2DConditionModel,
 )
-from diffusers.loaders import LoraLoaderMixin
 from huggingface_hub import hf_hub_download
 import safetensors
 import torch
 from tqdm import tqdm
 from transformers import AutoTokenizer, CLIPTextModelWithProjection
 # Base models (fine-tuned from SDXL-1.0)
 SDXL_REPO = "stabilityai/stable-diffusion-xl-base-1.0"
 DPO_REPO = "mhdang/dpo-sdxl-text2image-v1"
 JN_REPO = "RunDiffusion/Juggernaut-XL-v9"
 JSDXL_REPO = "stabilityai/japanese-stable-diffusion-xl"
-# Evo-Ukiyoe
-UKIYOE_REPO = "SakanaAI/Evo-Ukiyoe-v1"
 def load_state_dict(checkpoint_file: Union[str, os.PathLike], device: str = "cpu"):
@@ -108,7 +106,7 @@ def split_conv_attn(weights):
     return {"conv": conv_tensors, "attn": attn_tensors}
-def load_evoukiyoe(device="cuda") -> StableDiffusionXLPipeline:
     # Load base models
     sdxl_weights = split_conv_attn(load_from_pretrained(SDXL_REPO, device=device))
     dpo_weights = split_conv_attn(
@@ -147,26 +145,15 @@ def load_evoukiyoe(device="cuda") -> StableDiffusionXLPipeline:
     unet = UNet2DConditionModel.from_config(unet_config).to(device=device)
     unet.load_state_dict({**new_conv, **new_attn})
-    # Load LoRA weights
-    state_dict, network_alphas = LoraLoaderMixin.lora_state_dict(
-        pretrained_model_name_or_path_or_dict=UKIYOE_REPO
-    )
-    LoraLoaderMixin.load_lora_into_unet(state_dict, network_alphas, unet)
-    unet.fuse_lora(1.0)
     # Load other modules
     text_encoder = CLIPTextModelWithProjection.from_pretrained(
-        JSDXL_REPO,
-        subfolder="text_encoder",
-        torch_dtype=torch.float16,
-        variant="fp16",
     )
     tokenizer = AutoTokenizer.from_pretrained(
-        JSDXL_REPO,
-        subfolder="tokenizer",
-        use_fast=False,
     )
     pipe = StableDiffusionXLPipeline.from_pretrained(
         SDXL_REPO,
         unet=unet,
@@ -176,4 +163,24 @@ def load_evoukiyoe(device="cuda") -> StableDiffusionXLPipeline:
         variant="fp16",
     )
     pipe = pipe.to(device, dtype=torch.float16)
-    return pipe

     StableDiffusionXLPipeline,
     UNet2DConditionModel,
 )
 from huggingface_hub import hf_hub_download
 import safetensors
 import torch
 from tqdm import tqdm
 from transformers import AutoTokenizer, CLIPTextModelWithProjection
 # Base models (fine-tuned from SDXL-1.0)
 SDXL_REPO = "stabilityai/stable-diffusion-xl-base-1.0"
 DPO_REPO = "mhdang/dpo-sdxl-text2image-v1"
 JN_REPO = "RunDiffusion/Juggernaut-XL-v9"
 JSDXL_REPO = "stabilityai/japanese-stable-diffusion-xl"
+# LoRA weights
+LORA_REPO = "SakanaAI/Evo-Ukiyoe-v1"
 def load_state_dict(checkpoint_file: Union[str, os.PathLike], device: str = "cpu"):
     return {"conv": conv_tensors, "attn": attn_tensors}
+def load_evo_ukiyoe(device="cuda") -> StableDiffusionXLPipeline:
     # Load base models
     sdxl_weights = split_conv_attn(load_from_pretrained(SDXL_REPO, device=device))
     dpo_weights = split_conv_attn(
     unet = UNet2DConditionModel.from_config(unet_config).to(device=device)
     unet.load_state_dict({**new_conv, **new_attn})
     # Load other modules
     text_encoder = CLIPTextModelWithProjection.from_pretrained(
+        JSDXL_REPO, subfolder="text_encoder", torch_dtype=torch.float16, variant="fp16",
     )
     tokenizer = AutoTokenizer.from_pretrained(
+        JSDXL_REPO, subfolder="tokenizer", use_fast=False,
     )
+    # Load pipeline
     pipe = StableDiffusionXLPipeline.from_pretrained(
         SDXL_REPO,
         unet=unet,
         variant="fp16",
     )
     pipe = pipe.to(device, dtype=torch.float16)
+    # Load LoRA module
+    pipe.load_lora_weights(LORA_REPO)
+    pipe.fuse_lora(lora_scale=1.0)
+    return pipe
+if __name__ == "__main__":
+    pipe: StableDiffusionXLPipeline = load_evo_ukiyoe()
+    images = pipe(
+        prompt="鶴が庭に立っている。雪が降っている。最高品質の輻の浮世絵。",
+        negative_prompt="",
+        width=1024,
+        height=1024,
+        guidance_scale=8.0,
+        num_inference_steps=50,
+        generator=torch.Generator().manual_seed(0),
+        num_images_per_prompt=1,
+        output_type="pil",
+    ).images
+    images[0].save("out.png")