alycialee commited on
Commit
ea996c1
1 Parent(s): fe0a119

onboard pruned state dict, remove tokenizer, remove inference api, update readme

Browse files
README.md CHANGED
@@ -3,6 +3,7 @@ license: apache-2.0
3
  language:
4
  - en
5
  pipeline_tag: fill-mask
 
6
  ---
7
 
8
  # Monarch Mixer-BERT
@@ -14,17 +15,27 @@ Check out our [GitHub](https://github.com/HazyResearch/m2/tree/main) for instruc
14
 
15
  ## How to use
16
 
17
- Using AutoModel:
18
  ```python
19
  from transformers import AutoModelForMaskedLM
20
  mlm = AutoModelForMaskedLM.from_pretrained('alycialee/m2-bert-260m', trust_remote_code=True)
21
  ```
22
 
 
 
 
 
 
 
23
  You can use this model with a pipeline for masked language modeling:
24
  ```python
25
- from transformers import pipeline
26
- unmasker = pipeline('fill-mask', model='alycialee/m2-bert-260m', trust_remote_code=True)
27
- unmasker("Every morning, I enjoy a cup of [MASK] to start my day.")
 
 
 
 
28
  ```
29
 
30
  ### Remote Code
 
3
  language:
4
  - en
5
  pipeline_tag: fill-mask
6
+ inference: false
7
  ---
8
 
9
  # Monarch Mixer-BERT
 
15
 
16
  ## How to use
17
 
18
+ You can load this model using Hugging Face `AutoModel`:
19
  ```python
20
  from transformers import AutoModelForMaskedLM
21
  mlm = AutoModelForMaskedLM.from_pretrained('alycialee/m2-bert-260m', trust_remote_code=True)
22
  ```
23
 
24
+ This model uses the Hugging Face `bert-base-uncased tokenizer`:
25
+ ```
26
+ from transformers import BertTokenizer
27
+ tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
28
+ ```
29
+
30
  You can use this model with a pipeline for masked language modeling:
31
  ```python
32
+ from transformers import AutoModelForMaskedLM, BertTokenizer, pipeline
33
+
34
+ tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
35
+ mlm = AutoModelForMaskedLM.from_pretrained('alycialee/m2-bert-260m', trust_remote_code=True)
36
+
37
+ unmasker = pipeline('fill-mask', model=mlm, tokenizer=tokenizer)
38
+ unmasker('Every morning, I enjoy a cup of [MASK] to start my day.')
39
  ```
40
 
41
  ### Remote Code
bert_layers.py CHANGED
@@ -245,6 +245,7 @@ class BertLayer(nn.Module):
245
  hyena_filter_dropout=config.hyena_filter_dropout,
246
  hyena_filter_order=config.hyena_filter_order,
247
  residual_long_conv=config.residual_long_conv,
 
248
  )
249
 
250
  if config.use_glu_mlp:
@@ -887,4 +888,3 @@ class BertForSequenceClassification(BertPreTrainedModel):
887
  hidden_states=None,
888
  attentions=None,
889
  )
890
-
 
245
  hyena_filter_dropout=config.hyena_filter_dropout,
246
  hyena_filter_order=config.hyena_filter_order,
247
  residual_long_conv=config.residual_long_conv,
248
+ hyena_training_additions=config.hyena_training_additions,
249
  )
250
 
251
  if config.use_glu_mlp:
 
888
  hidden_states=None,
889
  attentions=None,
890
  )
 
config.json CHANGED
@@ -5,6 +5,7 @@
5
  "BertForMaskedLM"
6
  ],
7
  "attention_probs_dropout_prob": 0.0,
 
8
  "auto_map": {
9
  "AutoConfig": "configuration_bert.BertConfig",
10
  "AutoModelForMaskedLM": "bert_layers.BertForMaskedLM"
@@ -27,7 +28,6 @@
27
  "transformers_version": "4.28.1",
28
  "type_vocab_size": 2,
29
  "use_cache": true,
30
- "vocab_size": 30522,
31
  "long_conv_l_max": 128,
32
  "long_conv_kernel_learning_rate": 1e-3,
33
  "hyena_lr_pos_emb": 1e-5,
@@ -35,10 +35,10 @@
35
  "hyena_wd": 0.1,
36
  "hyena_emb_dim": 5,
37
  "hyena_filter_order": 128,
38
- "bidirectional": true,
39
  "residual_long_conv": true,
40
  "use_glu_mlp": true,
41
  "use_monarch_mlp": true,
42
  "monarch_mlp_nblocks": 4,
43
- "use_positional_encodings" : true
 
44
  }
 
5
  "BertForMaskedLM"
6
  ],
7
  "attention_probs_dropout_prob": 0.0,
8
+ "bidirectional": true,
9
  "auto_map": {
10
  "AutoConfig": "configuration_bert.BertConfig",
11
  "AutoModelForMaskedLM": "bert_layers.BertForMaskedLM"
 
28
  "transformers_version": "4.28.1",
29
  "type_vocab_size": 2,
30
  "use_cache": true,
 
31
  "long_conv_l_max": 128,
32
  "long_conv_kernel_learning_rate": 1e-3,
33
  "hyena_lr_pos_emb": 1e-5,
 
35
  "hyena_wd": 0.1,
36
  "hyena_emb_dim": 5,
37
  "hyena_filter_order": 128,
 
38
  "residual_long_conv": true,
39
  "use_glu_mlp": true,
40
  "use_monarch_mlp": true,
41
  "monarch_mlp_nblocks": 4,
42
+ "use_positional_encodings" : true,
43
+ "vocab_size": 30528
44
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9c7f227d6e7b073df86573a94cef19a6ae1a2bc27269c8a9445db67a5c061cf
3
- size 956301312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0155ae9c0b8923f8ea79e768fb70bb1af75af0cd4adb0b166ea288e0d8732117
3
+ size 1036917225
special_tokens_map.json DELETED
@@ -1,7 +0,0 @@
1
- {
2
- "cls_token": "[CLS]",
3
- "mask_token": "[MASK]",
4
- "pad_token": "[PAD]",
5
- "sep_token": "[SEP]",
6
- "unk_token": "[UNK]"
7
- }
 
 
 
 
 
 
 
 
tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json DELETED
@@ -1,13 +0,0 @@
1
- {
2
- "clean_up_tokenization_spaces": true,
3
- "cls_token": "[CLS]",
4
- "do_lower_case": true,
5
- "mask_token": "[MASK]",
6
- "model_max_length": 512,
7
- "pad_token": "[PAD]",
8
- "sep_token": "[SEP]",
9
- "strip_accents": null,
10
- "tokenize_chinese_chars": true,
11
- "tokenizer_class": "BertTokenizer",
12
- "unk_token": "[UNK]"
13
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vocab.txt DELETED
The diff for this file is too large to render. See raw diff