{ "_name_or_path": "dandelin/vilt-b32-mlm", "architectures": [ "ViltForQuestionAnswering" ], "attention_probs_dropout_prob": 0.0, "hidden_act": "gelu", "hidden_dropout_prob": 0.0, "hidden_size": 768, "id2label": { "0": "exit", "1": "hat", "2": "tired", "3": "stand", "4": "birthday", "5": "curtain", "6": "wine", "7": "ball", "8": "park", "9": "jeep", "10": "snowboard", "11": "snow", "12": "dirt", "13": "lying down", "14": "gray and black", "15": "cat", "16": "3", "17": "soccer ball", "18": "wall", "19": "picnic table", "20": "church", "21": "shelter", "22": "tan", "23": "bricks", "24": "white", "25": "suv", "26": "style", "27": "ice cream", "28": "down", "29": "watching", "30": "on street", "31": "security", "32": "yellow", "33": "africa", "34": "chair", "35": "screen", "36": "stripes", "37": "tower", "38": "5", "39": "gray", "40": "double", "41": "smile", "42": "name tag", "43": "blue and white", "44": "skier", "45": "2000", "46": "cage", "47": "skiing", "48": "yes", "49": "woods", "50": "black", "51": "platform", "52": "sidewalk", "53": "calico", "54": "necklace", "55": "man", "56": "lg", "57": "ground", "58": "queen", "59": "small", "60": "skateboarding", "61": "rack", "62": "clock tower", "63": "snowboarding", "64": "red and yellow", "65": "desert", "66": "0", "67": "tabby", "68": "2013", "69": "canopy", "70": "7:35", "71": "tv", "72": "net", "73": "plastic", "74": "out", "75": "woman", "76": "king", "77": "nothing", "78": "they aren't", "79": "door", "80": "in car", "81": "protection", "82": "bicycles", "83": "green", "84": "6", "85": "2010", "86": "arrow", "87": "giraffe", "88": "low", "89": "orange", "90": "large", "91": "purple", "92": "not there", "93": "human", "94": "window", "95": "don't know", "96": "many", "97": "blonde", "98": "hair", "99": "talking", "100": "doughnut", "101": "crown", "102": "big ben", "103": "fence", "104": "no", "105": "boy", "106": "person", "107": "8", "108": "happy", "109": "sun", "110": "cup", "111": "clock", "112": "lady", "113": "train", "114": "leather", "115": "bicycle", "116": "car", "117": "shadows", "118": "on road", "119": "sky", "120": "shade", "121": "unknown", "122": "curtains", "123": "shrimp", "124": "7:45", "125": "solid", "126": "little girl", "127": "station", "128": "trees", "129": "fashion", "130": "clear", "131": "natural", "132": "snowboarder", "133": "hawaii", "134": "backpack", "135": "monitor", "136": "9:35", "137": "can't tell", "138": "blue", "139": "red and blue", "140": "2", "141": "windows", "142": "girl", "143": "full", "144": "dog", "145": "zoo", "146": "women", "147": "donut", "148": "cross", "149": "plain", "150": "bedroom", "151": "crossing", "152": "8:35", "153": "sleeping", "154": "wedding", "155": "10", "156": "bike rack", "157": "right", "158": "white and blue", "159": "wine tasting", "160": "street", "161": "cloudy", "162": "7", "163": "resting", "164": "tent", "165": "brick", "166": "at table", "167": "chopsticks", "168": "photographer", "169": "smiling", "170": "bikes", "171": "giraffes", "172": "red", "173": "not sure", "174": "white and black", "175": "1", "176": "table", "177": "shadow", "178": "skateboard", "179": "soccer", "180": "bus", "181": "black and white", "182": "air", "183": "talking on phone", "184": "walking", "185": "pink", "186": "french", "187": "camera", "188": "forest", "189": "lanyard", "190": "beige", "191": "neon", "192": "beagle", "193": "outside", "194": "brown", "195": "4", "196": "roof", "197": "plate", "198": "laying down" }, "image_size": 384, "initializer_range": 0.02, "intermediate_size": 3072, "label2id": { "0": 66, "1": 175, "10": 155, "2": 140, "2000": 45, "2010": 85, "2013": 68, "3": 16, "4": 195, "5": 38, "6": 84, "7": 162, "7:35": 70, "7:45": 124, "8": 107, "8:35": 152, "9:35": 136, "africa": 33, "air": 182, "arrow": 86, "at table": 166, "backpack": 134, "ball": 7, "beagle": 192, "bedroom": 150, "beige": 190, "bicycle": 115, "bicycles": 82, "big ben": 102, "bike rack": 156, "bikes": 170, "birthday": 4, "black": 50, "black and white": 181, "blonde": 97, "blue": 138, "blue and white": 43, "boy": 105, "brick": 165, "bricks": 23, "brown": 194, "bus": 180, "cage": 46, "calico": 53, "camera": 187, "can't tell": 137, "canopy": 69, "car": 116, "cat": 15, "chair": 34, "chopsticks": 167, "church": 20, "clear": 130, "clock": 111, "clock tower": 62, "cloudy": 161, "cross": 148, "crossing": 151, "crown": 101, "cup": 110, "curtain": 5, "curtains": 122, "desert": 65, "dirt": 12, "dog": 144, "don't know": 95, "donut": 147, "door": 79, "double": 40, "doughnut": 100, "down": 28, "exit": 0, "fashion": 129, "fence": 103, "forest": 188, "french": 186, "full": 143, "giraffe": 87, "giraffes": 171, "girl": 142, "gray": 39, "gray and black": 14, "green": 83, "ground": 57, "hair": 98, "happy": 108, "hat": 1, "hawaii": 133, "human": 93, "ice cream": 27, "in car": 80, "jeep": 9, "king": 76, "lady": 112, "lanyard": 189, "large": 90, "laying down": 198, "leather": 114, "lg": 56, "little girl": 126, "low": 88, "lying down": 13, "man": 55, "many": 96, "monitor": 135, "name tag": 42, "natural": 131, "necklace": 54, "neon": 191, "net": 72, "no": 104, "not sure": 173, "not there": 92, "nothing": 77, "on road": 118, "on street": 30, "orange": 89, "out": 74, "outside": 193, "park": 8, "person": 106, "photographer": 168, "picnic table": 19, "pink": 185, "plain": 149, "plastic": 73, "plate": 197, "platform": 51, "protection": 81, "purple": 91, "queen": 58, "rack": 61, "red": 172, "red and blue": 139, "red and yellow": 64, "resting": 163, "right": 157, "roof": 196, "screen": 35, "security": 31, "shade": 120, "shadow": 177, "shadows": 117, "shelter": 21, "shrimp": 123, "sidewalk": 52, "skateboard": 178, "skateboarding": 60, "skier": 44, "skiing": 47, "sky": 119, "sleeping": 153, "small": 59, "smile": 41, "smiling": 169, "snow": 11, "snowboard": 10, "snowboarder": 132, "snowboarding": 63, "soccer": 179, "soccer ball": 17, "solid": 125, "stand": 3, "station": 127, "street": 160, "stripes": 36, "style": 26, "sun": 109, "suv": 25, "tabby": 67, "table": 176, "talking": 99, "talking on phone": 183, "tan": 22, "tent": 164, "they aren't": 78, "tired": 2, "tower": 37, "train": 113, "trees": 128, "tv": 71, "unknown": 121, "walking": 184, "wall": 18, "watching": 29, "wedding": 154, "white": 24, "white and black": 174, "white and blue": 158, "window": 94, "windows": 141, "wine": 6, "wine tasting": 159, "woman": 75, "women": 146, "woods": 49, "yellow": 32, "yes": 48, "zoo": 145 }, "layer_norm_eps": 1e-12, "max_image_length": -1, "max_position_embeddings": 40, "modality_type_vocab_size": 2, "model_type": "vilt", "num_attention_heads": 12, "num_channels": 3, "num_hidden_layers": 12, "num_images": -1, "patch_size": 32, "qkv_bias": true, "tie_word_embeddings": false, "torch_dtype": "float32", "transformers_version": "4.33.1", "type_vocab_size": 2, "vocab_size": 30522 }