Spaces:
Running
on
Zero
Running
on
Zero
File size: 2,731 Bytes
46ff99b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
optimus:
symbol: optimus
find_unused_parameters: false
args: {}
optimus_bert_encoder:
super_cfg: optimus
type: optimus_bert_connector
# pth: pretrained/optimus_bert_encoder.pth
args:
config:
architectures:
- BertForMaskedLM
attention_probs_dropout_prob: 0.1
finetuning_task: null
hidden_act: gelu
hidden_dropout_prob: 0.1
hidden_size: 768
initializer_range: 0.02
intermediate_size: 3072
layer_norm_eps: 1.e-12
max_position_embeddings: 512
num_attention_heads: 12
num_hidden_layers: 12
num_labels: 2
output_attentions: false
output_hidden_states: false
pruned_heads: {}
torchscript: false
type_vocab_size: 2
vocab_size: 28996
latent_size: 768
optimus_bert_tokenizer:
super_cfg: optimus
type: optimus_bert_tokenizer
args:
do_lower_case: false
max_len: 512
vocab_file: configs/vocab/bert-base-cased-vocab.txt
optimus_gpt2_decoder:
super_cfg: optimus
type: optimus_gpt2_connector
# pth: pretrained/optimus_gpt2_decoder.pth
args:
config:
architectures:
- GPT2LMHeadModel
attn_pdrop: 0.1
embd_pdrop: 0.1
finetuning_task: null
hidden_size: 768
initializer_range: 0.02
latent_size: 768
layer_norm_epsilon: 1.e-05
max_position_embeddings: 1024
n_ctx: 1024
n_embd: 768
n_head: 12
n_layer: 12
n_positions: 1024
num_attention_heads: 12
num_hidden_layers: 12
num_labels: 1
output_attentions: false
output_hidden_states: false
pretrained_config_archive_map:
gpt2 : https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-config.json
gpt2-medium : https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-medium-config.json
gpt2-large : https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-large-config.json
pruned_heads: {}
resid_pdrop: 0.1
summary_activation: null
summary_first_dropout: 0.1
summary_proj_to_labels: true
summary_type: cls_index
summary_use_proj: true
torchscript: false
vocab_size: 50260
optimus_gpt2_tokenizer:
super_cfg: optimus
type: optimus_gpt2_tokenizer
args:
do_lower_case: false
max_len: 1024
vocab_file: configs/vocab/gpt2-vocab.json
merges_file: configs/vocab/gpt2-merges.txt
optimus_vae:
super_cfg: optimus
type: optimus_vae
pth: pretrained/optimus-vae.pth
args:
encoder: MODEL(optimus_bert_encoder)
decoder: MODEL(optimus_gpt2_decoder)
tokenizer_encoder: MODEL(optimus_bert_tokenizer)
tokenizer_decoder: MODEL(optimus_gpt2_tokenizer)
args:
latent_size: 768
|