Diffuser Wrapper error
I've been trying to create a wrapper for the Diffusers pipeline in ComfyUI and am getting a persistent error when trying to preview the generation.
Loading Lumina model from: K:\AI-Art\ComfyUI_windows_portable\ComfyUI\custom_nodes\Lumina-Next-SFT-DiffusersWrapper\Lumina-Next-SFT-diffusers
Loading pipeline components...: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββ| 5/5 [00:01<00:00, 3.01it/s]
100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 10/10 [00:07<00:00, 1.39it/s]
Final image tensor shape: torch.Size([1, 3, 1024, 1024])
!!! Exception during processing!!! Cannot handle this data type: (1, 1, 1024), |u1
Traceback (most recent call last):
File "K:\AI-Art\ComfyUI_windows_portable\python_embeded\Lib\site-packages\PIL\Image.py", line 3277, in fromarray
mode, rawmode = _fromarray_typemap[typekey]
~~~~~~~~~~~~~~~~~~^^^^^^^^^
KeyError: ((1, 1, 1024), '|u1')
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "K:\AI-Art\ComfyUI_windows_portable\ComfyUI\execution.py", line 151, in recursive_execute
output_data, output_ui = get_output_data(obj, input_data_all)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "K:\AI-Art\ComfyUI_windows_portable\ComfyUI\execution.py", line 81, in get_output_data
return_values = map_node_over_list(obj, input_data_all, obj.FUNCTION, allow_interrupt=True)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "K:\AI-Art\ComfyUI_windows_portable\ComfyUI\custom_nodes\ComfyUI-0246\utils.py", line 381, in new_func
res_value = old_func(*final_args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "K:\AI-Art\ComfyUI_windows_portable\ComfyUI\execution.py", line 74, in map_node_over_list
results.append(getattr(obj, func)(**slice_dict(input_data_all, i)))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "K:\AI-Art\ComfyUI_windows_portable\ComfyUI\nodes.py", line 1436, in save_images
img = Image.fromarray(np.clip(i, 0, 255).astype(np.uint8))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "K:\AI-Art\ComfyUI_windows_portable\python_embeded\Lib\site-packages\PIL\Image.py", line 3281, in fromarray
raise TypeError(msg) from e
TypeError: Cannot handle this data type: (1, 1, 1024), |u1
Prompt executed in 15.21 seconds
I've tried everything, was wondering if the generation is in a particular format that I'm not handling correctly.
Could you please give more details about your environment?
Hi @Excido,
what is the detail about Lumina-Next-SFT-DiffusersWrapper
in custom_node
. Could you give your implementation about Lumina-Next-SFT-DiffusersWrapper
?
I've uploaded my current implementation into this repo with the environment_details.txt
https://github.com/Excidos/ComfyUI-Lumina-Next-SFT-DiffusersWrapper.git
Could you give the full errors? it seems like incorrect post-processing in your code.
So I worked a bit on it and it generates but this is the image and decoded latent I get. Its doing something I think, and I feel it may be a post-processing issue
[rgthree] Using rgthree's optimized recursive execution.
Generation device: cuda:0
Starting generation with seed: 858
100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 30/30 [00:22<00:00, 1.33it/s]
Raw output shape: torch.Size([1, 3, 1024, 1024])
Raw output min: 0.0, max: 1.0
Permuted images shape: torch.Size([1, 1024, 1024, 3])
Images min: 0.0, max: 1.0
Final images shape: torch.Size([1, 1024, 1024, 3])
Final images min: 128, max: 255
Latents shape: torch.Size([1, 4, 128, 128])
Latents min: -2.109375, max: 3.109375
Using pytorch attention in VAE
Using pytorch attention in VAE
Requested to load AutoencoderKL
Loading 1 new model
Prompt executed in 23.47 seconds
'''python
import torch
from diffusers import LuminaText2ImgPipeline, FlowMatchEulerDiscreteScheduler
import comfy.model_management as mm
import os
import numpy as np
import traceback
import math
class LuminaDiffusersNode:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"model_path": ("STRING", {"default": "Lumina-Next-SFT-diffusers"}),
"prompt": ("STRING", {"multiline": True}),
"negative_prompt": ("STRING", {"multiline": True}),
"num_inference_steps": ("INT", {"default": 30, "min": 1, "max": 200}),
"guidance_scale": ("FLOAT", {"default": 4.0, "min": 0.1, "max": 20.0}),
"width": ("INT", {"default": 1024, "min": 512, "max": 2048, "step": 64}),
"height": ("INT", {"default": 1024, "min": 512, "max": 2048, "step": 64}),
"seed": ("INT", {"default": -1}),
"batch_size": ("INT", {"default": 1, "min": 1, "max": 4}),
"scaling_watershed": ("FLOAT", {"default": 0.3, "min": 0.0, "max": 1.0}),
"time_aware_scaling": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 2.0}),
"context_drop_ratio": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 0.5}),
}
}
RETURN_TYPES = ("IMAGE", "LATENT")
FUNCTION = "generate"
CATEGORY = "LuminaWrapper"
def __init__(self):
self.pipe = None
def load_model(self, model_path):
try:
device = mm.get_torch_device()
dtype = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else torch.float32
print(f"Attempting to load Lumina model from: {model_path}")
print(f"Device: {device}, Dtype: {dtype}")
full_path = os.path.join(os.path.dirname(__file__), model_path)
if not os.path.exists(full_path):
raise ValueError(f"Model path does not exist: {full_path}")
print(f"Loading Lumina model from: {full_path}")
self.pipe = LuminaText2ImgPipeline.from_pretrained(full_path, torch_dtype=dtype)
self.pipe.scheduler = FlowMatchEulerDiscreteScheduler.from_config(self.pipe.scheduler.config)
self.pipe.to(device)
print("Pipeline successfully loaded and moved to device.")
except Exception as e:
print(f"Error in load_model: {str(e)}")
traceback.print_exc()
def apply_time_aware_scaling(self, transformer, scale_factor):
if hasattr(transformer, 'text_encoder'):
transformer.text_encoder.config.time_aware_scaling = scale_factor
if hasattr(transformer, 'unet'):
transformer.unet.config.time_aware_scaling = scale_factor
def apply_context_drop(self, transformer, drop_ratio):
if hasattr(transformer, 'text_encoder'):
transformer.text_encoder.config.context_drop_ratio = drop_ratio
if hasattr(transformer, 'unet'):
transformer.unet.config.context_drop_ratio = drop_ratio
def generate(self, model_path, prompt, negative_prompt, num_inference_steps, guidance_scale, width, height, seed, batch_size, scaling_watershed, time_aware_scaling, context_drop_ratio):
try:
if self.pipe is None:
print("Pipeline not loaded. Attempting to load model.")
self.load_model(model_path)
if self.pipe is None:
raise ValueError("Failed to load the pipeline.")
device = mm.get_torch_device()
print(f"Generation device: {device}")
if seed == -1:
seed = int.from_bytes(os.urandom(4), "big")
generator = torch.Generator(device=device).manual_seed(seed)
# Prepare Lumina-specific kwargs
scale_factor = math.sqrt(width * height / 1024**2)
# Modify the pipe's transformer to include Lumina-specific features
if hasattr(self.pipe, 'transformer'):
self.pipe.transformer.scale_factor = scale_factor
self.pipe.transformer.scale_watershed = scaling_watershed
self.apply_time_aware_scaling(self.pipe.transformer, time_aware_scaling)
self.apply_context_drop(self.pipe.transformer, context_drop_ratio)
print(f"Starting generation with seed: {seed}")
output = self.pipe(
prompt=[prompt] * batch_size,
negative_prompt=[negative_prompt] * batch_size,
num_inference_steps=num_inference_steps,
guidance_scale=guidance_scale,
width=width,
height=height,
generator=generator,
num_images_per_prompt=1,
output_type="pt",
)
print(f"Raw output shape: {output.images.shape}")
print(f"Raw output min: {output.images.min()}, max: {output.images.max()}")
images = output.images
images = images.permute(0, 2, 3, 1).cpu()
print(f"Permuted images shape: {images.shape}")
print(f"Images min: {images.min()}, max: {images.max()}")
# Apply normalization
images = (images + 1) / 2 # Assuming the output is in the range [-1, 1]
images = (images * 255).round().clamp(0, 255).to(torch.uint8)
print(f"Final images shape: {images.shape}")
print(f"Final images min: {images.min()}, max: {images.max()}")
# Generate latents
with torch.no_grad():
latents = self.pipe.vae.encode(output.images.to(self.pipe.vae.dtype)).latent_dist.sample()
latents = latents * self.pipe.vae.config.scaling_factor
print(f"Latents shape: {latents.shape}")
print(f"Latents min: {latents.min()}, max: {latents.max()}")
latents_for_comfy = {"samples": latents.cpu()}
return (images, latents_for_comfy)
except Exception as e:
print(f"Error in generate: {str(e)}")
traceback.print_exc()
return (torch.zeros((batch_size, height, width, 3), dtype=torch.uint8),
{"samples": torch.zeros((batch_size, 4, height // 8, width // 8), dtype=torch.float32)})
NODE_CLASS_MAPPINGS = {
"LuminaDiffusersNode": LuminaDiffusersNode
}
NODE_DISPLAY_NAME_MAPPINGS = {
"LuminaDiffusersNode": "Lumina-Next-SFT Diffusers"
}
python
'''