Upload model
Browse files- config.json +1 -1
- configuration_uniformer.py +51 -0
- modelling_uniformer.py +4 -3
config.json
CHANGED
@@ -50,5 +50,5 @@
|
|
50 |
"qkv_bias": true,
|
51 |
"representation_size": null,
|
52 |
"torch_dtype": "float32",
|
53 |
-
"transformers_version": "4.
|
54 |
}
|
|
|
50 |
"qkv_bias": true,
|
51 |
"representation_size": null,
|
52 |
"torch_dtype": "float32",
|
53 |
+
"transformers_version": "4.39.3"
|
54 |
}
|
configuration_uniformer.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import PretrainedConfig
|
2 |
+
from transformers.utils import logging
|
3 |
+
|
4 |
+
logger = logging.get_logger(__name__)
|
5 |
+
|
6 |
+
|
7 |
+
class UniFormerWithProjectionHeadConfig(PretrainedConfig):
|
8 |
+
|
9 |
+
model_type = 'uniformer'
|
10 |
+
|
11 |
+
def __init__(
|
12 |
+
self,
|
13 |
+
projection_size=None,
|
14 |
+
embed_dim=[64, 128, 320, 512],
|
15 |
+
image_size=384,
|
16 |
+
in_chans=3,
|
17 |
+
depth=[5, 8, 20, 7],
|
18 |
+
patch_size=[4, 2, 2, 2],
|
19 |
+
head_dim=64,
|
20 |
+
mlp_ratio=4,
|
21 |
+
qkv_bias=True,
|
22 |
+
num_classes=1000,
|
23 |
+
qk_scale=None,
|
24 |
+
representation_size=None,
|
25 |
+
drop_rate=0.0,
|
26 |
+
drop_path_rate=0.3,
|
27 |
+
attn_drop_rate=0.0,
|
28 |
+
conv_stem=False,
|
29 |
+
layer_norm_eps=1e-6,
|
30 |
+
**kwargs,
|
31 |
+
):
|
32 |
+
super().__init__(
|
33 |
+
layer_norm_eps=layer_norm_eps,
|
34 |
+
image_size=image_size,
|
35 |
+
qkv_bias=qkv_bias,
|
36 |
+
**kwargs,
|
37 |
+
)
|
38 |
+
self.projection_size = projection_size
|
39 |
+
self.embed_dim = embed_dim
|
40 |
+
self.in_chans = in_chans
|
41 |
+
self.depth = depth
|
42 |
+
self.patch_size = patch_size
|
43 |
+
self.head_dim = head_dim
|
44 |
+
self.mlp_ratio = mlp_ratio
|
45 |
+
self.num_classes = num_classes
|
46 |
+
self.qk_scale = qk_scale
|
47 |
+
self.representation_size = representation_size
|
48 |
+
self.drop_rate = drop_rate
|
49 |
+
self.drop_path_rate = drop_path_rate
|
50 |
+
self.attn_drop_rate = attn_drop_rate
|
51 |
+
self.conv_stem = conv_stem
|
modelling_uniformer.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
from collections import OrderedDict
|
2 |
from functools import partial
|
3 |
-
from typing import Optional, Tuple, Union
|
4 |
from math import isqrt
|
|
|
5 |
|
6 |
import torch
|
7 |
import torch.nn as nn
|
@@ -11,6 +11,8 @@ from transformers.modeling_outputs import ModelOutput
|
|
11 |
from transformers.modeling_utils import PreTrainedModel
|
12 |
from transformers.utils import logging
|
13 |
|
|
|
|
|
14 |
logger = logging.get_logger(__name__)
|
15 |
|
16 |
|
@@ -293,8 +295,7 @@ class UniFormerPreTrainedModel(PreTrainedModel):
|
|
293 |
models.
|
294 |
"""
|
295 |
|
296 |
-
config_class =
|
297 |
-
base_model_prefix = "vit"
|
298 |
main_input_name = "pixel_values"
|
299 |
|
300 |
def _init_weights(self, m):
|
|
|
1 |
from collections import OrderedDict
|
2 |
from functools import partial
|
|
|
3 |
from math import isqrt
|
4 |
+
from typing import Optional, Tuple, Union
|
5 |
|
6 |
import torch
|
7 |
import torch.nn as nn
|
|
|
11 |
from transformers.modeling_utils import PreTrainedModel
|
12 |
from transformers.utils import logging
|
13 |
|
14 |
+
from .configuration_uniformer import UniFormerWithProjectionHeadConfig
|
15 |
+
|
16 |
logger = logging.get_logger(__name__)
|
17 |
|
18 |
|
|
|
295 |
models.
|
296 |
"""
|
297 |
|
298 |
+
config_class = UniFormerWithProjectionHeadConfig
|
|
|
299 |
main_input_name = "pixel_values"
|
300 |
|
301 |
def _init_weights(self, m):
|