OrionZheng commited on
Commit
96f1431
1 Parent(s): b4d14fc

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +51 -0
config.json CHANGED
@@ -47,4 +47,55 @@
47
  "transformers_version": "4.34.0",
48
  "use_cache": true,
49
  "vocab_size": 256384
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  }
 
47
  "transformers_version": "4.34.0",
48
  "use_cache": true,
49
  "vocab_size": 256384
50
+ }
51
+
52
+ {
53
+ "architectures": [
54
+ "OpenMoeForCausalLM"
55
+ ],
56
+ "auto_map": {
57
+ "AutoModelForCausalLM": "modeling_openmoe.OpenMoeForCausalLM"
58
+ },
59
+ "attention_bias": false,
60
+ "bos_token_id": 2,
61
+ "dropout_rate": 0.0,
62
+ "enable_comm_overlap": false,
63
+ "enable_hierarchical_alltoall": false,
64
+ "enable_kernel": false,
65
+ "enable_load_balance": false,
66
+ "eos_token_id": 1,
67
+ "expert_parallel": null,
68
+ "head_dim": 64,
69
+ "hidden_act": "swiglu",
70
+ "hidden_size": 768,
71
+ "initializer_range": 0.02,
72
+ "intermediate_size": 2048,
73
+ "layer_norm_epsilon": 1e-06,
74
+ "load_balance_beam_width": 8,
75
+ "load_balance_group_swap_factor": 0.4,
76
+ "load_balance_tolerance": 0.1,
77
+ "max_position_embeddings": 2048,
78
+ "mlp_gated": true,
79
+ "model_type": "llama",
80
+ "moe_layer_interval": 4,
81
+ "num_attention_heads": 12,
82
+ "num_experts": 16,
83
+ "num_hidden_layers": 12,
84
+ "num_key_value_heads": 12,
85
+ "pad_token_id": 0,
86
+ "pretraining_tp": 1,
87
+ "rms_norm_eps": 1e-06,
88
+ "rope_scaling": null,
89
+ "rope_theta": 10000.0,
90
+ "router_capacity_factor_eval": 2.0,
91
+ "router_capacity_factor_train": 1.25,
92
+ "router_drop_tks": true,
93
+ "router_min_capacity": 4,
94
+ "router_noisy_policy": null,
95
+ "router_topk": 2,
96
+ "tie_word_embeddings": false,
97
+ "torch_dtype": "float32",
98
+ "transformers_version": "4.34.0",
99
+ "use_cache": true,
100
+ "vocab_size": 256384
101
  }