from transformers import PretrainedConfig from transformers.utils import logging logger = logging.get_logger(__name__) class UniFormerWithProjectionHeadConfig(PretrainedConfig): model_type = 'vit' def __init__( self, projection_size=None, embed_dim=[64, 128, 320, 512], image_size=384, in_chans=3, depth=[5, 8, 20, 7], patch_size=[4, 2, 2, 2], head_dim=64, mlp_ratio=4, qkv_bias=True, num_classes=1000, qk_scale=None, representation_size=None, drop_rate=0.0, drop_path_rate=0.3, attn_drop_rate=0.0, conv_stem=False, layer_norm_eps=1e-6, **kwargs, ): super().__init__( layer_norm_eps=layer_norm_eps, image_size=image_size, qkv_bias=qkv_bias, **kwargs, ) self.projection_size = projection_size self.embed_dim = embed_dim self.in_chans = in_chans self.depth = depth self.patch_size = patch_size self.head_dim = head_dim self.mlp_ratio = mlp_ratio self.num_classes = num_classes self.qk_scale = qk_scale self.representation_size = representation_size self.drop_rate = drop_rate self.drop_path_rate = drop_path_rate self.attn_drop_rate = attn_drop_rate self.conv_stem = conv_stem