|
{ |
|
"E": 2560, |
|
"L": 32, |
|
"T": 2048, |
|
"_mup": true, |
|
"alt_pw_klass": { |
|
"type": "" |
|
}, |
|
"alt_rel_klass": { |
|
"fused": true, |
|
"type": "alibi" |
|
}, |
|
"alt_sa_klass": { |
|
"triton": true, |
|
"type": "flash", |
|
"use_rotary_emb": null |
|
}, |
|
"architectures": [ |
|
"CodifyForCausalLM" |
|
], |
|
"attn_a_reach": 2048, |
|
"attn_b_reach": 2048, |
|
"attn_heads": 40, |
|
"attn_ra_nbasis": 64, |
|
"attn_seq": [ |
|
"d" |
|
], |
|
"attn_sparse_layout_seq": null, |
|
"auto_map": { |
|
"AutoConfig": "configuration_codify.CodifyConfig", |
|
"AutoModel": "modeling_codify.CodifyForCausalLM" |
|
}, |
|
"backcheck_pw": "inside", |
|
"backcheck_sa": "none", |
|
"bos_token_id": 1, |
|
"dtype_acts": "torch.float16", |
|
"dtype_weights": "torch.float16", |
|
"enc_name": "openai_programming_v2", |
|
"eos_token_id": 2, |
|
"init_scale": 1, |
|
"initializer_range": 0.02, |
|
"layer_norm_epsilon": 1e-05, |
|
"mlp_mult": 4, |
|
"model_type": "codify", |
|
"moe": null, |
|
"mup_optimal_lr": 0.0005, |
|
"mup_shapes_file": "lean_former/mup/alibi_32l/shapes.json", |
|
"posemb": false, |
|
"rescale_embeddings": false, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.24.0", |
|
"tune": [ |
|
3, |
|
3, |
|
3, |
|
3 |
|
], |
|
"unembedding_shared": false, |
|
"use_cache": true, |
|
"use_res_scale": false, |
|
"vocab_size": 51305 |
|
} |
|
|