import torch from diffusers import StableDiffusionXLPipeline, DDIMScheduler import base64 from io import BytesIO import os class InferenceHandler: def __init__(self): # Determine the device to run on self.device = "cuda" if torch.cuda.is_available() else "cpu" # Get the directory where this script is located model_dir = os.path.dirname(os.path.abspath(__file__)) # Print the model directory for debugging purposes print("Loading model from directory:", model_dir) # Load the pipeline with authentication self.pipe = StableDiffusionXLPipeline.from_pretrained( model_dir, torch_dtype=torch.float16, use_safetensors=True, use_auth_token=os.getenv("HUGGINGFACE_TOKEN") ).to(self.device) # Set the scheduler programmatically self.pipe.scheduler = DDIMScheduler.from_config(self.pipe.scheduler.config) def __call__(self, inputs): # Extract the prompt from inputs prompt = inputs.get("prompt", "") if not prompt: raise ValueError("A prompt must be provided") negative_prompt = inputs.get("negative_prompt", "") # Generate the image using the pipeline image = self.pipe( prompt=prompt, negative_prompt=negative_prompt, num_inference_steps=30, guidance_scale=7.5 ).images[0] # Convert the image to base64 encoding buffered = BytesIO() image.save(buffered, format="PNG") image_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8") # Return the base64 image return {"image_base64": image_base64} # Instantiate the handler handler = InferenceHandler()