import torch # Configuration parameters MODEL_TYPE = "i_blip" MAX_IMAGE_SIZE = 1024 MIN_LENGTH = 150 MAX_NEW_TOKENS = 400 MODEL_PATH = "m7mdal7aj/captioner" LOAD_IN_8BIT = False LOAD_IN_4BIT = True TORCH_DTYPE = torch.float16 DEVICE_MAP = "auto" LOW_CPU_MEM_USAGE = True SKIP_SPECIAL_TOKENS = True PROMPT = 'Provide a comprehensive and detailed description of the following image. Focus on identifying and describing every element in the scene, including all people (men, women, boys, girls, kids, babies) and all objects, their count, their positions, and any actions or interactions taking place. Pay special attention to the positioning of limbs and hands, and any objects they might be holding or interacting with. Describe colors, textures, setting, atmosphere, mood, and any indicators of the time of day, such as the quality of light, shadows. Ensure to capture both the obvious and subtle elements for a complete understanding of the image.'