ahatamiz commited on
Commit
dcd7b4c
1 Parent(s): e760ef2

Upload model

Browse files
Files changed (3) hide show
  1. config.json +2 -3
  2. model.safetensors +3 -0
  3. modeling_mambavision.py +7 -7
config.json CHANGED
@@ -1,11 +1,10 @@
1
  {
2
  "architectures": [
3
- "MambaVisionModelForImageClassification"
4
  ],
5
  "auto_map": {
6
  "AutoConfig": "configuration_mambavision.MambaVisionConfig",
7
- "AutoModel": "modeling_mambavision.MambaVisionModel",
8
- "AutoModelForImageClassification": "modeling_mambavision.MambaVisionModelForImageClassification"
9
  },
10
  "depths": [
11
  3,
 
1
  {
2
  "architectures": [
3
+ "MambaVisionModel"
4
  ],
5
  "auto_map": {
6
  "AutoConfig": "configuration_mambavision.MambaVisionConfig",
7
+ "AutoModel": "modeling_mambavision.MambaVisionModel"
 
8
  },
9
  "depths": [
10
  3,
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccb5ad877adfa27edfd3bc4d5d8f0b3fd9fdd9be1ec88fa25c1577471ce8201f
3
+ size 390807536
modeling_mambavision.py CHANGED
@@ -28,7 +28,7 @@ from einops import rearrange, repeat
28
 
29
  from transformers import PreTrainedModel
30
 
31
- from .configuration_mambavision import MambaVisionConfig
32
 
33
 
34
  def _cfg(url='', **kwargs):
@@ -280,7 +280,7 @@ class ConvBlock(nn.Module):
280
  self.norm2 = nn.BatchNorm2d(dim, eps=1e-5)
281
  self.layer_scale = layer_scale
282
  if layer_scale is not None and type(layer_scale) in [int, float]:
283
- self.gamma = nn.Parameter(layer_scale * torch.ones(dim))
284
  self.layer_scale = True
285
  else:
286
  self.layer_scale = False
@@ -294,7 +294,7 @@ class ConvBlock(nn.Module):
294
  x = self.conv2(x)
295
  x = self.norm2(x)
296
  if self.layer_scale:
297
- x = x * self.gamma.view(1, -1, 1, 1)
298
  x = input + self.drop_path(x)
299
  return x
300
 
@@ -502,12 +502,12 @@ class Block(nn.Module):
502
  mlp_hidden_dim = int(dim * mlp_ratio)
503
  self.mlp = Mlp_block(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
504
  use_layer_scale = layer_scale is not None and type(layer_scale) in [int, float]
505
- self.gamma_1 = nn.Parameter(layer_scale * torch.ones(dim)) if use_layer_scale else 1
506
- self.gamma_2 = nn.Parameter(layer_scale * torch.ones(dim)) if use_layer_scale else 1
507
 
508
  def forward(self, x):
509
- x = x + self.drop_path(self.gamma_1 * self.mixer(self.norm1(x)))
510
- x = x + self.drop_path(self.gamma_2 * self.mlp(self.norm2(x)))
511
  return x
512
 
513
 
 
28
 
29
  from transformers import PreTrainedModel
30
 
31
+ from configuration_mambavision import MambaVisionConfig
32
 
33
 
34
  def _cfg(url='', **kwargs):
 
280
  self.norm2 = nn.BatchNorm2d(dim, eps=1e-5)
281
  self.layer_scale = layer_scale
282
  if layer_scale is not None and type(layer_scale) in [int, float]:
283
+ self.g = nn.Parameter(layer_scale * torch.ones(dim))
284
  self.layer_scale = True
285
  else:
286
  self.layer_scale = False
 
294
  x = self.conv2(x)
295
  x = self.norm2(x)
296
  if self.layer_scale:
297
+ x = x * self.g.view(1, -1, 1, 1)
298
  x = input + self.drop_path(x)
299
  return x
300
 
 
502
  mlp_hidden_dim = int(dim * mlp_ratio)
503
  self.mlp = Mlp_block(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
504
  use_layer_scale = layer_scale is not None and type(layer_scale) in [int, float]
505
+ self.g_1 = nn.Parameter(layer_scale * torch.ones(dim)) if use_layer_scale else 1
506
+ self.g_2 = nn.Parameter(layer_scale * torch.ones(dim)) if use_layer_scale else 1
507
 
508
  def forward(self, x):
509
+ x = x + self.drop_path(self.g_1 * self.mixer(self.norm1(x)))
510
+ x = x + self.drop_path(self.g_2 * self.mlp(self.norm2(x)))
511
  return x
512
 
513