Transformers
mobilevit
Inference Endpoints
Jingya's picture
Jingya HF staff
Upload config.json with huggingface_hub
64675ce verified
{
"_name_or_path": "apple/deeplabv3-mobilevit-small",
"architectures": [
"MobileViTForSemanticSegmentation"
],
"aspp_dropout_prob": 0.1,
"aspp_out_channels": 256,
"atrous_rates": [
6,
12,
18
],
"attention_probs_dropout_prob": 0.0,
"classifier_dropout_prob": 0.1,
"conv_kernel_size": 3,
"expand_ratio": 4.0,
"hidden_act": "silu",
"hidden_dropout_prob": 0.1,
"hidden_sizes": [
144,
192,
240
],
"id2label": {
"0": "background",
"1": "aeroplane",
"2": "bicycle",
"3": "bird",
"4": "boat",
"5": "bottle",
"6": "bus",
"7": "car",
"8": "cat",
"9": "chair",
"10": "cow",
"11": "diningtable",
"12": "dog",
"13": "horse",
"14": "motorbike",
"15": "person",
"16": "pottedplant",
"17": "sheep",
"18": "sofa",
"19": "train",
"20": "tvmonitor"
},
"image_size": 512,
"initializer_range": 0.02,
"label2id": {
"aeroplane": 1,
"background": 0,
"bicycle": 2,
"bird": 3,
"boat": 4,
"bottle": 5,
"bus": 6,
"car": 7,
"cat": 8,
"chair": 9,
"cow": 10,
"diningtable": 11,
"dog": 12,
"horse": 13,
"motorbike": 14,
"person": 15,
"pottedplant": 16,
"sheep": 17,
"sofa": 18,
"train": 19,
"tvmonitor": 20
},
"layer_norm_eps": 1e-05,
"mlp_ratio": 2.0,
"model_type": "mobilevit",
"neck_hidden_sizes": [
16,
32,
64,
96,
128,
160,
640
],
"neuron": {
"auto_cast": null,
"auto_cast_type": null,
"compiler_type": "neuronx-cc",
"compiler_version": "2.13.66.0+6dfecc895",
"dynamic_batch_size": false,
"inline_weights_to_neff": false,
"input_names": [
"pixel_values"
],
"model_type": "mobilevit",
"optlevel": "2",
"output_attentions": false,
"output_hidden_states": false,
"output_names": [
"logits"
],
"static_batch_size": 1,
"static_image_size": 512,
"static_num_channels": 3,
"static_patch_size": 2
},
"num_attention_heads": 4,
"num_channels": 3,
"output_stride": 16,
"patch_size": 2,
"qkv_bias": true,
"semantic_loss_ignore_index": 255,
"task": "semantic-segmentation",
"torchscript": true,
"transformers_version": "4.40.2"
}