File size: 1,310 Bytes
5cc155f ec51200 5cc155f ec51200 5cc155f ec51200 5cc155f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
{
"E": 2560,
"L": 32,
"T": 2048,
"_mup": true,
"alt_pw_klass": {
"type": ""
},
"alt_rel_klass": {
"fused": true,
"type": "alibi"
},
"alt_sa_klass": {
"triton": true,
"type": "flash",
"use_rotary_emb": null
},
"architectures": [
"CodifyForCausalLM"
],
"attn_a_reach": 2048,
"attn_b_reach": 2048,
"attn_heads": 40,
"attn_ra_nbasis": 64,
"attn_seq": [
"d"
],
"attn_sparse_layout_seq": null,
"auto_map": {
"AutoConfig": "configuration_codify.CodifyConfig",
"AutoModel": "modeling_codify.CodifyForCausalLM"
},
"backcheck_pw": "inside",
"backcheck_sa": "none",
"bos_token_id": 1,
"dtype_acts": "torch.float16",
"dtype_weights": "torch.float16",
"enc_name": "openai_programming_v2",
"eos_token_id": 2,
"init_scale": 1,
"initializer_range": 0.02,
"layer_norm_epsilon": 1e-05,
"mlp_mult": 4,
"model_type": "codify",
"moe": null,
"mup_optimal_lr": 0.0005,
"mup_shapes_file": "lean_former/mup/alibi_32l/shapes.json",
"posemb": false,
"rescale_embeddings": false,
"tie_word_embeddings": false,
"torch_dtype": "float32",
"transformers_version": "4.24.0",
"tune": [
3,
3,
3,
3
],
"unembedding_shared": false,
"use_cache": true,
"use_res_scale": false,
"vocab_size": 51305
}
|