onboard pruned state dict, remove tokenizer, remove inference api, update readme
Browse files- README.md +15 -4
- bert_layers.py +1 -1
- config.json +3 -3
- pytorch_model.bin +2 -2
- special_tokens_map.json +0 -7
- tokenizer.json +0 -0
- tokenizer_config.json +0 -13
- vocab.txt +0 -0
README.md
CHANGED
@@ -3,6 +3,7 @@ license: apache-2.0
|
|
3 |
language:
|
4 |
- en
|
5 |
pipeline_tag: fill-mask
|
|
|
6 |
---
|
7 |
|
8 |
# Monarch Mixer-BERT
|
@@ -14,17 +15,27 @@ Check out our [GitHub](https://github.com/HazyResearch/m2/tree/main) for instruc
|
|
14 |
|
15 |
## How to use
|
16 |
|
17 |
-
|
18 |
```python
|
19 |
from transformers import AutoModelForMaskedLM
|
20 |
mlm = AutoModelForMaskedLM.from_pretrained('alycialee/m2-bert-260m', trust_remote_code=True)
|
21 |
```
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
You can use this model with a pipeline for masked language modeling:
|
24 |
```python
|
25 |
-
from transformers import pipeline
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
|
|
28 |
```
|
29 |
|
30 |
### Remote Code
|
|
|
3 |
language:
|
4 |
- en
|
5 |
pipeline_tag: fill-mask
|
6 |
+
inference: false
|
7 |
---
|
8 |
|
9 |
# Monarch Mixer-BERT
|
|
|
15 |
|
16 |
## How to use
|
17 |
|
18 |
+
You can load this model using Hugging Face `AutoModel`:
|
19 |
```python
|
20 |
from transformers import AutoModelForMaskedLM
|
21 |
mlm = AutoModelForMaskedLM.from_pretrained('alycialee/m2-bert-260m', trust_remote_code=True)
|
22 |
```
|
23 |
|
24 |
+
This model uses the Hugging Face `bert-base-uncased tokenizer`:
|
25 |
+
```
|
26 |
+
from transformers import BertTokenizer
|
27 |
+
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
28 |
+
```
|
29 |
+
|
30 |
You can use this model with a pipeline for masked language modeling:
|
31 |
```python
|
32 |
+
from transformers import AutoModelForMaskedLM, BertTokenizer, pipeline
|
33 |
+
|
34 |
+
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
35 |
+
mlm = AutoModelForMaskedLM.from_pretrained('alycialee/m2-bert-260m', trust_remote_code=True)
|
36 |
+
|
37 |
+
unmasker = pipeline('fill-mask', model=mlm, tokenizer=tokenizer)
|
38 |
+
unmasker('Every morning, I enjoy a cup of [MASK] to start my day.')
|
39 |
```
|
40 |
|
41 |
### Remote Code
|
bert_layers.py
CHANGED
@@ -245,6 +245,7 @@ class BertLayer(nn.Module):
|
|
245 |
hyena_filter_dropout=config.hyena_filter_dropout,
|
246 |
hyena_filter_order=config.hyena_filter_order,
|
247 |
residual_long_conv=config.residual_long_conv,
|
|
|
248 |
)
|
249 |
|
250 |
if config.use_glu_mlp:
|
@@ -887,4 +888,3 @@ class BertForSequenceClassification(BertPreTrainedModel):
|
|
887 |
hidden_states=None,
|
888 |
attentions=None,
|
889 |
)
|
890 |
-
|
|
|
245 |
hyena_filter_dropout=config.hyena_filter_dropout,
|
246 |
hyena_filter_order=config.hyena_filter_order,
|
247 |
residual_long_conv=config.residual_long_conv,
|
248 |
+
hyena_training_additions=config.hyena_training_additions,
|
249 |
)
|
250 |
|
251 |
if config.use_glu_mlp:
|
|
|
888 |
hidden_states=None,
|
889 |
attentions=None,
|
890 |
)
|
|
config.json
CHANGED
@@ -5,6 +5,7 @@
|
|
5 |
"BertForMaskedLM"
|
6 |
],
|
7 |
"attention_probs_dropout_prob": 0.0,
|
|
|
8 |
"auto_map": {
|
9 |
"AutoConfig": "configuration_bert.BertConfig",
|
10 |
"AutoModelForMaskedLM": "bert_layers.BertForMaskedLM"
|
@@ -27,7 +28,6 @@
|
|
27 |
"transformers_version": "4.28.1",
|
28 |
"type_vocab_size": 2,
|
29 |
"use_cache": true,
|
30 |
-
"vocab_size": 30522,
|
31 |
"long_conv_l_max": 128,
|
32 |
"long_conv_kernel_learning_rate": 1e-3,
|
33 |
"hyena_lr_pos_emb": 1e-5,
|
@@ -35,10 +35,10 @@
|
|
35 |
"hyena_wd": 0.1,
|
36 |
"hyena_emb_dim": 5,
|
37 |
"hyena_filter_order": 128,
|
38 |
-
"bidirectional": true,
|
39 |
"residual_long_conv": true,
|
40 |
"use_glu_mlp": true,
|
41 |
"use_monarch_mlp": true,
|
42 |
"monarch_mlp_nblocks": 4,
|
43 |
-
"use_positional_encodings" : true
|
|
|
44 |
}
|
|
|
5 |
"BertForMaskedLM"
|
6 |
],
|
7 |
"attention_probs_dropout_prob": 0.0,
|
8 |
+
"bidirectional": true,
|
9 |
"auto_map": {
|
10 |
"AutoConfig": "configuration_bert.BertConfig",
|
11 |
"AutoModelForMaskedLM": "bert_layers.BertForMaskedLM"
|
|
|
28 |
"transformers_version": "4.28.1",
|
29 |
"type_vocab_size": 2,
|
30 |
"use_cache": true,
|
|
|
31 |
"long_conv_l_max": 128,
|
32 |
"long_conv_kernel_learning_rate": 1e-3,
|
33 |
"hyena_lr_pos_emb": 1e-5,
|
|
|
35 |
"hyena_wd": 0.1,
|
36 |
"hyena_emb_dim": 5,
|
37 |
"hyena_filter_order": 128,
|
|
|
38 |
"residual_long_conv": true,
|
39 |
"use_glu_mlp": true,
|
40 |
"use_monarch_mlp": true,
|
41 |
"monarch_mlp_nblocks": 4,
|
42 |
+
"use_positional_encodings" : true,
|
43 |
+
"vocab_size": 30528
|
44 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0155ae9c0b8923f8ea79e768fb70bb1af75af0cd4adb0b166ea288e0d8732117
|
3 |
+
size 1036917225
|
special_tokens_map.json
DELETED
@@ -1,7 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"cls_token": "[CLS]",
|
3 |
-
"mask_token": "[MASK]",
|
4 |
-
"pad_token": "[PAD]",
|
5 |
-
"sep_token": "[SEP]",
|
6 |
-
"unk_token": "[UNK]"
|
7 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tokenizer.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
DELETED
@@ -1,13 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"clean_up_tokenization_spaces": true,
|
3 |
-
"cls_token": "[CLS]",
|
4 |
-
"do_lower_case": true,
|
5 |
-
"mask_token": "[MASK]",
|
6 |
-
"model_max_length": 512,
|
7 |
-
"pad_token": "[PAD]",
|
8 |
-
"sep_token": "[SEP]",
|
9 |
-
"strip_accents": null,
|
10 |
-
"tokenize_chinese_chars": true,
|
11 |
-
"tokenizer_class": "BertTokenizer",
|
12 |
-
"unk_token": "[UNK]"
|
13 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vocab.txt
DELETED
The diff for this file is too large to render.
See raw diff
|
|