Image2Paragraph / models /controlnet_model.py
Awiny's picture
add blip2 to local
9b4b3ea
raw
history blame
2.14 kB
import cv2
import torch
import numpy as np
from PIL import Image
from diffusers import (
StableDiffusionControlNetPipeline,
ControlNetModel,
UniPCMultistepScheduler,
)
class TextToImage:
def __init__(self, device):
self.device = device
self.model = self.initialize_model()
def initialize_model(self):
if self.device == 'cpu':
self.data_type = torch.float32
else:
self.data_type = torch.float16
controlnet = ControlNetModel.from_pretrained(
"fusing/stable-diffusion-v1-5-controlnet-canny",
torch_dtype=self.data_type,
map_location=self.device, # Add this line
).to(self.device)
pipeline = StableDiffusionControlNetPipeline.from_pretrained(
# "pretrained_models/stable-diffusion-v1-5",
"runwayml/stable-diffusion-v1-5",
controlnet=controlnet,
safety_checker=None,
torch_dtype=self.data_type,
map_location=self.device, # Add this line
)
pipeline.scheduler = UniPCMultistepScheduler.from_config(
pipeline.scheduler.config
)
pipeline.to(self.device)
if self.device != 'cpu':
pipeline.enable_model_cpu_offload()
return pipeline
@staticmethod
def preprocess_image(image):
image = np.array(image)
low_threshold = 100
high_threshold = 200
image = cv2.Canny(image, low_threshold, high_threshold)
image = np.stack([image, image, image], axis=2)
image = Image.fromarray(image)
return image
def text_to_image(self, text, image):
print('\033[1;35m' + '*' * 100 + '\033[0m')
print('\nStep5, Text to Image:')
image = self.preprocess_image(image)
generated_image = self.model(text, image, num_inference_steps=20).images[0]
print("Generated image has been svaed.")
print('\033[1;35m' + '*' * 100 + '\033[0m')
return generated_image
def text_to_image_debug(self, text, image):
print("text_to_image_debug")
return image