Edit model card

yujiepan/stable-diffusion-3-tiny-random

This pipeline is intended from debugging. It is adapted from stabilityai/stable-diffusion-3-medium-diffusers with smaller size and randomly initialized parameters.

Usage

import torch
from diffusers import StableDiffusion3Pipeline

pipe = StableDiffusion3Pipeline.from_pretrained("yujiepan/stable-diffusion-3-tiny-random", torch_dtype=torch.float16)
pipe = pipe.to("cuda")

image = pipe(
    "A cat holding a sign that says hello world",
    negative_prompt="",
    num_inference_steps=2,
    guidance_scale=7.0,
).images[0]
image

Codes

import importlib

import torch
import transformers

import diffusers
import rich


def get_original_model_configs(pipeline_cls: type[diffusers.DiffusionPipeline], pipeline_id: str):
    pipeline_config: dict[str, list[str]] = pipeline_cls.load_config(pipeline_id)
    model_configs = {}

    for subfolder, import_strings in pipeline_config.items():
        if subfolder.startswith("_"):
            continue
        module = importlib.import_module(".".join(import_strings[:-1]))
        cls = getattr(module, import_strings[-1])
        if issubclass(cls, transformers.PreTrainedModel):
            config_class: transformers.PretrainedConfig = cls.config_class
            config = config_class.from_pretrained(pipeline_id, subfolder=subfolder)
            model_configs[subfolder] = config
        elif issubclass(cls, diffusers.ModelMixin) and issubclass(cls, diffusers.ConfigMixin):
            config = cls.load_config(pipeline_id, subfolder=subfolder)
            model_configs[subfolder] = config

    return model_configs


def load_pipeline(pipeline_cls: type[diffusers.DiffusionPipeline], pipeline_id: str, model_configs: dict[str, dict]):
    pipeline_config: dict[str, list[str]] = pipeline_cls.load_config(pipeline_id)
    components = {}
    for subfolder, import_strings in pipeline_config.items():
        if subfolder.startswith("_"):
            continue
        module = importlib.import_module(".".join(import_strings[:-1]))
        cls = getattr(module, import_strings[-1])
        print(f"Loading:", ".".join(import_strings))
        if issubclass(cls, transformers.PreTrainedModel):
            config = model_configs[subfolder]
            component = cls(config)
        elif issubclass(cls, transformers.PreTrainedTokenizerBase):
            component = cls.from_pretrained(pipeline_id, subfolder=subfolder)
        elif issubclass(cls, diffusers.ModelMixin) and issubclass(cls, diffusers.ConfigMixin):
            config = model_configs[subfolder]
            component = cls.from_config(config)
        elif issubclass(cls, diffusers.SchedulerMixin) and issubclass(cls, diffusers.ConfigMixin):
            component = cls.from_pretrained(pipeline_id, subfolder=subfolder)
        else:
            raise (f"unknown {subfolder}: {import_strings}")
        components[subfolder] = component
    pipeline = pipeline_cls(**components)
    return pipeline


def get_pipeline():
    torch.manual_seed(42)
    pipeline_id = "stabilityai/stable-diffusion-3-medium-diffusers"
    pipeline_cls = diffusers.StableDiffusion3Pipeline
    model_configs = get_original_model_configs(pipeline_cls, pipeline_id)
    rich.print(model_configs)

    HIDDEN_SIZE = 8

    model_configs["text_encoder"].hidden_size = HIDDEN_SIZE
    model_configs["text_encoder"].intermediate_size = HIDDEN_SIZE * 2
    model_configs["text_encoder"].num_attention_heads = 2
    model_configs["text_encoder"].num_hidden_layers = 2
    model_configs["text_encoder"].projection_dim = HIDDEN_SIZE

    model_configs["text_encoder_2"].hidden_size = HIDDEN_SIZE
    model_configs["text_encoder_2"].intermediate_size = HIDDEN_SIZE * 2
    model_configs["text_encoder_2"].num_attention_heads = 2
    model_configs["text_encoder_2"].num_hidden_layers = 2
    model_configs["text_encoder_2"].projection_dim = HIDDEN_SIZE

    model_configs["text_encoder_3"].d_model = HIDDEN_SIZE
    model_configs["text_encoder_3"].d_ff = HIDDEN_SIZE * 2
    model_configs["text_encoder_3"].d_kv = HIDDEN_SIZE // 2
    model_configs["text_encoder_3"].num_heads = 2
    model_configs["text_encoder_3"].num_layers = 2

    model_configs["transformer"]["num_layers"] = 2
    model_configs["transformer"]["num_attention_heads"] = 2
    model_configs["transformer"]["attention_head_dim"] = HIDDEN_SIZE // 2
    model_configs["transformer"]["pooled_projection_dim"] = HIDDEN_SIZE * 2
    model_configs["transformer"]["joint_attention_dim"] = HIDDEN_SIZE
    model_configs["transformer"]["caption_projection_dim"] = HIDDEN_SIZE

    model_configs["vae"]["layers_per_block"] = 1
    model_configs["vae"]["block_out_channels"] = [HIDDEN_SIZE] * 4
    model_configs["vae"]["norm_num_groups"] = 2
    model_configs["vae"]["latent_channels"] = 16

    pipeline = load_pipeline(pipeline_cls, pipeline_id, model_configs)
    return pipeline


pipeline = get_pipeline()
image = pipeline(
    "hello world",
    negative_prompt="runtime error",
    num_inference_steps=2,
    guidance_scale=7.0,
).images[0]


pipeline = pipeline.to(torch.float16)
pipeline.save_pretrained("/tmp/stable-diffusion-3-tiny-random")
pipeline.push_to_hub("yujiepan/stable-diffusion-3-tiny-random")
Downloads last month
33,398
Inference Examples
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.