diff --git "a/model.safetensors.index.json" "b/model.safetensors.index.json" new file mode 100644--- /dev/null +++ "b/model.safetensors.index.json" @@ -0,0 +1,1788 @@ +{ + "metadata": { + "total_size": 15162995712 + }, + "weight_map": { + "decoder.block.0.layer.0.SelfAttention.k.weight": "model-00002-of-00004.safetensors", + "decoder.block.0.layer.0.SelfAttention.o.weight": "model-00002-of-00004.safetensors", + "decoder.block.0.layer.0.SelfAttention.q.weight": "model-00002-of-00004.safetensors", + "decoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight": "model-00002-of-00004.safetensors", + "decoder.block.0.layer.0.SelfAttention.v.weight": "model-00002-of-00004.safetensors", + "decoder.block.0.layer.0.layer_norm.weight": "model-00002-of-00004.safetensors", + "decoder.block.0.layer.1.EncDecAttention.k.weight": "model-00002-of-00004.safetensors", + "decoder.block.0.layer.1.EncDecAttention.o.weight": "model-00002-of-00004.safetensors", + "decoder.block.0.layer.1.EncDecAttention.q.weight": "model-00002-of-00004.safetensors", + "decoder.block.0.layer.1.EncDecAttention.v.weight": "model-00002-of-00004.safetensors", + "decoder.block.0.layer.1.layer_norm.weight": "model-00002-of-00004.safetensors", + "decoder.block.0.layer.2.layer_norm.weight": "model-00002-of-00004.safetensors", + "decoder.block.0.layer.2.mlp.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.0.layer.2.mlp.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.0.SelfAttention.k.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.0.SelfAttention.o.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.0.SelfAttention.q.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.0.SelfAttention.v.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.0.layer_norm.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.1.EncDecAttention.k.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.1.EncDecAttention.o.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.1.EncDecAttention.q.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.1.EncDecAttention.v.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.1.layer_norm.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.layer_norm.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_0.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_0.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_1.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_1.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_10.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_10.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_11.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_11.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_12.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_12.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_13.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_13.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_14.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_14.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_15.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_15.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_16.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_16.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_17.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_17.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_18.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_18.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_19.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_19.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_2.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_2.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_20.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_20.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_21.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_21.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_22.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_22.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_23.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_23.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_24.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_24.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_25.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_25.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_26.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_26.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_27.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_27.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_28.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_28.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_29.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_29.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_3.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_3.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_30.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_30.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_31.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_31.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_32.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_32.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_33.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_33.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_34.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_34.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_35.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_35.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_36.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_36.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_37.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_37.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_38.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_38.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_39.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_39.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_4.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_4.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_40.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_40.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_41.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_41.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_42.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_42.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_43.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_43.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_44.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_44.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_45.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_45.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_46.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_46.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_47.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_47.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_48.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_48.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_49.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_49.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_5.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_5.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_50.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_50.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_51.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_51.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_52.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_52.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_53.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_53.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_54.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_54.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_55.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_55.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_56.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_56.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_57.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_57.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_58.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_58.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_59.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_59.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_6.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_6.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_60.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_60.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_61.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_61.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_62.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_62.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_63.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_63.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_7.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_7.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_8.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_8.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_9.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.experts.expert_9.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.1.layer.2.mlp.router.classifier.weight": "model-00002-of-00004.safetensors", + "decoder.block.10.layer.0.SelfAttention.k.weight": "model-00003-of-00004.safetensors", + "decoder.block.10.layer.0.SelfAttention.o.weight": "model-00003-of-00004.safetensors", + "decoder.block.10.layer.0.SelfAttention.q.weight": "model-00003-of-00004.safetensors", + "decoder.block.10.layer.0.SelfAttention.v.weight": "model-00003-of-00004.safetensors", + "decoder.block.10.layer.0.layer_norm.weight": "model-00003-of-00004.safetensors", + "decoder.block.10.layer.1.EncDecAttention.k.weight": "model-00003-of-00004.safetensors", + "decoder.block.10.layer.1.EncDecAttention.o.weight": "model-00003-of-00004.safetensors", + "decoder.block.10.layer.1.EncDecAttention.q.weight": "model-00003-of-00004.safetensors", + "decoder.block.10.layer.1.EncDecAttention.v.weight": "model-00003-of-00004.safetensors", + "decoder.block.10.layer.1.layer_norm.weight": "model-00003-of-00004.safetensors", + "decoder.block.10.layer.2.layer_norm.weight": "model-00003-of-00004.safetensors", + "decoder.block.10.layer.2.mlp.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.10.layer.2.mlp.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.0.SelfAttention.k.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.0.SelfAttention.o.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.0.SelfAttention.q.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.0.SelfAttention.v.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.0.layer_norm.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.1.EncDecAttention.k.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.1.EncDecAttention.o.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.1.EncDecAttention.q.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.1.EncDecAttention.v.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.1.layer_norm.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.layer_norm.weight": "model-00004-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_0.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_0.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_1.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_1.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_10.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_10.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_11.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_11.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_12.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_12.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_13.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_13.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_14.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_14.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_15.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_15.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_16.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_16.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_17.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_17.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_18.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_18.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_19.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_19.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_2.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_2.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_20.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_20.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_21.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_21.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_22.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_22.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_23.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_23.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_24.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_24.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_25.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_25.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_26.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_26.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_27.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_27.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_28.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_28.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_29.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_29.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_3.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_3.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_30.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_30.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_31.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_31.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_32.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_32.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_33.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_33.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_34.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_34.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_35.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_35.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_36.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_36.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_37.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_37.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_38.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_38.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_39.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_39.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_4.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_4.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_40.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_40.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_41.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_41.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_42.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_42.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_43.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_43.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_44.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_44.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_45.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_45.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_46.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_46.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_47.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_47.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_48.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_48.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_49.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_49.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_5.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_5.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_50.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_50.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_51.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_51.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_52.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_52.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_53.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_53.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_54.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_54.wo.weight": "model-00004-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_55.wi.weight": "model-00004-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_55.wo.weight": "model-00004-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_56.wi.weight": "model-00004-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_56.wo.weight": "model-00004-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_57.wi.weight": "model-00004-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_57.wo.weight": "model-00004-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_58.wi.weight": "model-00004-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_58.wo.weight": "model-00004-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_59.wi.weight": "model-00004-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_59.wo.weight": "model-00004-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_6.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_6.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_60.wi.weight": "model-00004-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_60.wo.weight": "model-00004-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_61.wi.weight": "model-00004-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_61.wo.weight": "model-00004-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_62.wi.weight": "model-00004-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_62.wo.weight": "model-00004-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_63.wi.weight": "model-00004-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_63.wo.weight": "model-00004-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_7.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_7.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_8.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_8.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_9.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.experts.expert_9.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.11.layer.2.mlp.router.classifier.weight": "model-00003-of-00004.safetensors", + "decoder.block.2.layer.0.SelfAttention.k.weight": "model-00002-of-00004.safetensors", + "decoder.block.2.layer.0.SelfAttention.o.weight": "model-00002-of-00004.safetensors", + "decoder.block.2.layer.0.SelfAttention.q.weight": "model-00002-of-00004.safetensors", + "decoder.block.2.layer.0.SelfAttention.v.weight": "model-00002-of-00004.safetensors", + "decoder.block.2.layer.0.layer_norm.weight": "model-00002-of-00004.safetensors", + "decoder.block.2.layer.1.EncDecAttention.k.weight": "model-00002-of-00004.safetensors", + "decoder.block.2.layer.1.EncDecAttention.o.weight": "model-00002-of-00004.safetensors", + "decoder.block.2.layer.1.EncDecAttention.q.weight": "model-00002-of-00004.safetensors", + "decoder.block.2.layer.1.EncDecAttention.v.weight": "model-00002-of-00004.safetensors", + "decoder.block.2.layer.1.layer_norm.weight": "model-00002-of-00004.safetensors", + "decoder.block.2.layer.2.layer_norm.weight": "model-00002-of-00004.safetensors", + "decoder.block.2.layer.2.mlp.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.2.layer.2.mlp.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.0.SelfAttention.k.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.0.SelfAttention.o.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.0.SelfAttention.q.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.0.SelfAttention.v.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.0.layer_norm.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.1.EncDecAttention.k.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.1.EncDecAttention.o.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.1.EncDecAttention.q.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.1.EncDecAttention.v.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.1.layer_norm.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.layer_norm.weight": "model-00003-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_0.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_0.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_1.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_1.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_10.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_10.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_11.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_11.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_12.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_12.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_13.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_13.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_14.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_14.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_15.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_15.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_16.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_16.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_17.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_17.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_18.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_18.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_19.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_19.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_2.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_2.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_20.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_20.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_21.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_21.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_22.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_22.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_23.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_23.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_24.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_24.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_25.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_25.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_26.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_26.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_27.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_27.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_28.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_28.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_29.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_29.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_3.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_3.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_30.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_30.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_31.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_31.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_32.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_32.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_33.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_33.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_34.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_34.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_35.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_35.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_36.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_36.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_37.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_37.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_38.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_38.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_39.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_39.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_4.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_4.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_40.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_40.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_41.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_41.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_42.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_42.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_43.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_43.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_44.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_44.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_45.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_45.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_46.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_46.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_47.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_47.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_48.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_48.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_49.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_49.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_5.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_5.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_50.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_50.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_51.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_51.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_52.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_52.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_53.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_53.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_54.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_54.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_55.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_55.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_56.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_56.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_57.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_57.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_58.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_58.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_59.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_59.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_6.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_6.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_60.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_60.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_61.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_61.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_62.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_62.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_63.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_63.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_7.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_7.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_8.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_8.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_9.wi.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.experts.expert_9.wo.weight": "model-00002-of-00004.safetensors", + "decoder.block.3.layer.2.mlp.router.classifier.weight": "model-00002-of-00004.safetensors", + "decoder.block.4.layer.0.SelfAttention.k.weight": "model-00003-of-00004.safetensors", + "decoder.block.4.layer.0.SelfAttention.o.weight": "model-00003-of-00004.safetensors", + "decoder.block.4.layer.0.SelfAttention.q.weight": "model-00003-of-00004.safetensors", + "decoder.block.4.layer.0.SelfAttention.v.weight": "model-00003-of-00004.safetensors", + "decoder.block.4.layer.0.layer_norm.weight": "model-00003-of-00004.safetensors", + "decoder.block.4.layer.1.EncDecAttention.k.weight": "model-00003-of-00004.safetensors", + "decoder.block.4.layer.1.EncDecAttention.o.weight": "model-00003-of-00004.safetensors", + "decoder.block.4.layer.1.EncDecAttention.q.weight": "model-00003-of-00004.safetensors", + "decoder.block.4.layer.1.EncDecAttention.v.weight": "model-00003-of-00004.safetensors", + "decoder.block.4.layer.1.layer_norm.weight": "model-00003-of-00004.safetensors", + "decoder.block.4.layer.2.layer_norm.weight": "model-00003-of-00004.safetensors", + "decoder.block.4.layer.2.mlp.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.4.layer.2.mlp.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.0.SelfAttention.k.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.0.SelfAttention.o.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.0.SelfAttention.q.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.0.SelfAttention.v.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.0.layer_norm.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.1.EncDecAttention.k.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.1.EncDecAttention.o.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.1.EncDecAttention.q.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.1.EncDecAttention.v.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.1.layer_norm.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.layer_norm.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_0.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_0.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_1.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_1.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_10.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_10.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_11.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_11.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_12.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_12.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_13.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_13.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_14.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_14.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_15.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_15.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_16.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_16.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_17.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_17.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_18.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_18.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_19.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_19.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_2.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_2.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_20.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_20.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_21.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_21.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_22.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_22.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_23.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_23.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_24.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_24.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_25.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_25.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_26.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_26.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_27.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_27.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_28.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_28.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_29.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_29.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_3.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_3.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_30.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_30.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_31.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_31.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_32.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_32.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_33.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_33.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_34.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_34.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_35.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_35.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_36.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_36.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_37.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_37.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_38.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_38.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_39.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_39.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_4.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_4.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_40.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_40.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_41.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_41.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_42.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_42.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_43.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_43.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_44.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_44.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_45.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_45.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_46.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_46.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_47.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_47.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_48.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_48.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_49.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_49.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_5.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_5.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_50.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_50.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_51.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_51.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_52.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_52.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_53.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_53.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_54.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_54.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_55.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_55.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_56.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_56.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_57.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_57.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_58.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_58.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_59.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_59.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_6.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_6.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_60.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_60.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_61.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_61.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_62.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_62.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_63.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_63.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_7.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_7.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_8.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_8.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_9.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.experts.expert_9.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.5.layer.2.mlp.router.classifier.weight": "model-00003-of-00004.safetensors", + "decoder.block.6.layer.0.SelfAttention.k.weight": "model-00003-of-00004.safetensors", + "decoder.block.6.layer.0.SelfAttention.o.weight": "model-00003-of-00004.safetensors", + "decoder.block.6.layer.0.SelfAttention.q.weight": "model-00003-of-00004.safetensors", + "decoder.block.6.layer.0.SelfAttention.v.weight": "model-00003-of-00004.safetensors", + "decoder.block.6.layer.0.layer_norm.weight": "model-00003-of-00004.safetensors", + "decoder.block.6.layer.1.EncDecAttention.k.weight": "model-00003-of-00004.safetensors", + "decoder.block.6.layer.1.EncDecAttention.o.weight": "model-00003-of-00004.safetensors", + "decoder.block.6.layer.1.EncDecAttention.q.weight": "model-00003-of-00004.safetensors", + "decoder.block.6.layer.1.EncDecAttention.v.weight": "model-00003-of-00004.safetensors", + "decoder.block.6.layer.1.layer_norm.weight": "model-00003-of-00004.safetensors", + "decoder.block.6.layer.2.layer_norm.weight": "model-00003-of-00004.safetensors", + "decoder.block.6.layer.2.mlp.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.6.layer.2.mlp.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.0.SelfAttention.k.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.0.SelfAttention.o.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.0.SelfAttention.q.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.0.SelfAttention.v.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.0.layer_norm.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.1.EncDecAttention.k.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.1.EncDecAttention.o.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.1.EncDecAttention.q.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.1.EncDecAttention.v.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.1.layer_norm.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.layer_norm.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_0.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_0.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_1.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_1.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_10.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_10.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_11.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_11.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_12.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_12.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_13.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_13.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_14.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_14.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_15.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_15.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_16.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_16.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_17.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_17.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_18.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_18.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_19.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_19.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_2.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_2.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_20.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_20.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_21.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_21.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_22.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_22.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_23.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_23.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_24.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_24.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_25.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_25.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_26.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_26.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_27.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_27.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_28.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_28.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_29.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_29.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_3.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_3.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_30.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_30.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_31.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_31.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_32.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_32.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_33.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_33.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_34.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_34.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_35.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_35.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_36.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_36.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_37.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_37.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_38.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_38.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_39.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_39.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_4.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_4.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_40.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_40.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_41.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_41.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_42.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_42.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_43.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_43.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_44.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_44.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_45.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_45.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_46.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_46.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_47.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_47.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_48.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_48.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_49.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_49.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_5.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_5.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_50.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_50.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_51.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_51.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_52.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_52.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_53.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_53.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_54.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_54.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_55.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_55.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_56.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_56.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_57.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_57.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_58.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_58.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_59.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_59.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_6.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_6.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_60.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_60.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_61.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_61.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_62.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_62.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_63.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_63.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_7.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_7.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_8.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_8.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_9.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.experts.expert_9.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.7.layer.2.mlp.router.classifier.weight": "model-00003-of-00004.safetensors", + "decoder.block.8.layer.0.SelfAttention.k.weight": "model-00003-of-00004.safetensors", + "decoder.block.8.layer.0.SelfAttention.o.weight": "model-00003-of-00004.safetensors", + "decoder.block.8.layer.0.SelfAttention.q.weight": "model-00003-of-00004.safetensors", + "decoder.block.8.layer.0.SelfAttention.v.weight": "model-00003-of-00004.safetensors", + "decoder.block.8.layer.0.layer_norm.weight": "model-00003-of-00004.safetensors", + "decoder.block.8.layer.1.EncDecAttention.k.weight": "model-00003-of-00004.safetensors", + "decoder.block.8.layer.1.EncDecAttention.o.weight": "model-00003-of-00004.safetensors", + "decoder.block.8.layer.1.EncDecAttention.q.weight": "model-00003-of-00004.safetensors", + "decoder.block.8.layer.1.EncDecAttention.v.weight": "model-00003-of-00004.safetensors", + "decoder.block.8.layer.1.layer_norm.weight": "model-00003-of-00004.safetensors", + "decoder.block.8.layer.2.layer_norm.weight": "model-00003-of-00004.safetensors", + "decoder.block.8.layer.2.mlp.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.8.layer.2.mlp.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.0.SelfAttention.k.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.0.SelfAttention.o.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.0.SelfAttention.q.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.0.SelfAttention.v.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.0.layer_norm.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.1.EncDecAttention.k.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.1.EncDecAttention.o.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.1.EncDecAttention.q.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.1.EncDecAttention.v.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.1.layer_norm.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.layer_norm.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_0.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_0.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_1.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_1.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_10.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_10.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_11.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_11.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_12.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_12.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_13.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_13.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_14.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_14.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_15.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_15.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_16.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_16.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_17.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_17.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_18.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_18.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_19.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_19.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_2.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_2.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_20.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_20.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_21.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_21.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_22.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_22.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_23.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_23.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_24.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_24.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_25.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_25.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_26.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_26.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_27.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_27.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_28.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_28.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_29.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_29.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_3.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_3.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_30.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_30.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_31.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_31.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_32.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_32.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_33.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_33.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_34.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_34.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_35.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_35.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_36.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_36.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_37.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_37.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_38.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_38.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_39.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_39.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_4.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_4.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_40.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_40.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_41.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_41.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_42.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_42.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_43.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_43.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_44.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_44.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_45.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_45.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_46.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_46.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_47.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_47.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_48.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_48.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_49.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_49.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_5.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_5.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_50.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_50.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_51.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_51.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_52.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_52.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_53.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_53.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_54.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_54.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_55.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_55.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_56.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_56.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_57.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_57.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_58.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_58.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_59.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_59.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_6.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_6.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_60.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_60.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_61.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_61.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_62.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_62.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_63.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_63.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_7.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_7.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_8.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_8.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_9.wi.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.experts.expert_9.wo.weight": "model-00003-of-00004.safetensors", + "decoder.block.9.layer.2.mlp.router.classifier.weight": "model-00003-of-00004.safetensors", + "decoder.final_layer_norm.weight": "model-00004-of-00004.safetensors", + "encoder.block.0.layer.0.SelfAttention.k.weight": "model-00001-of-00004.safetensors", + "encoder.block.0.layer.0.SelfAttention.o.weight": "model-00001-of-00004.safetensors", + "encoder.block.0.layer.0.SelfAttention.q.weight": "model-00001-of-00004.safetensors", + "encoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight": "model-00001-of-00004.safetensors", + "encoder.block.0.layer.0.SelfAttention.v.weight": "model-00001-of-00004.safetensors", + "encoder.block.0.layer.0.layer_norm.weight": "model-00001-of-00004.safetensors", + "encoder.block.0.layer.1.layer_norm.weight": "model-00001-of-00004.safetensors", + "encoder.block.0.layer.1.mlp.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.0.layer.1.mlp.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.0.SelfAttention.k.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.0.SelfAttention.o.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.0.SelfAttention.q.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.0.SelfAttention.v.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.0.layer_norm.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.layer_norm.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_0.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_0.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_1.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_1.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_10.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_10.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_11.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_11.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_12.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_12.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_13.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_13.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_14.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_14.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_15.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_15.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_16.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_16.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_17.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_17.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_18.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_18.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_19.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_19.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_2.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_2.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_20.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_20.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_21.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_21.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_22.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_22.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_23.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_23.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_24.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_24.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_25.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_25.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_26.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_26.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_27.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_27.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_28.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_28.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_29.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_29.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_3.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_3.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_30.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_30.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_31.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_31.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_32.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_32.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_33.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_33.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_34.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_34.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_35.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_35.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_36.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_36.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_37.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_37.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_38.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_38.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_39.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_39.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_4.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_4.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_40.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_40.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_41.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_41.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_42.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_42.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_43.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_43.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_44.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_44.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_45.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_45.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_46.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_46.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_47.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_47.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_48.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_48.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_49.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_49.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_5.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_5.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_50.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_50.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_51.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_51.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_52.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_52.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_53.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_53.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_54.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_54.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_55.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_55.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_56.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_56.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_57.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_57.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_58.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_58.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_59.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_59.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_6.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_6.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_60.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_60.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_61.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_61.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_62.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_62.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_63.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_63.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_7.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_7.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_8.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_8.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_9.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.experts.expert_9.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.1.layer.1.mlp.router.classifier.weight": "model-00001-of-00004.safetensors", + "encoder.block.10.layer.0.SelfAttention.k.weight": "model-00002-of-00004.safetensors", + "encoder.block.10.layer.0.SelfAttention.o.weight": "model-00002-of-00004.safetensors", + "encoder.block.10.layer.0.SelfAttention.q.weight": "model-00002-of-00004.safetensors", + "encoder.block.10.layer.0.SelfAttention.v.weight": "model-00002-of-00004.safetensors", + "encoder.block.10.layer.0.layer_norm.weight": "model-00002-of-00004.safetensors", + "encoder.block.10.layer.1.layer_norm.weight": "model-00002-of-00004.safetensors", + "encoder.block.10.layer.1.mlp.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.10.layer.1.mlp.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.0.SelfAttention.k.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.0.SelfAttention.o.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.0.SelfAttention.q.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.0.SelfAttention.v.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.0.layer_norm.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.layer_norm.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_0.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_0.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_1.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_1.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_10.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_10.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_11.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_11.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_12.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_12.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_13.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_13.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_14.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_14.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_15.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_15.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_16.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_16.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_17.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_17.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_18.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_18.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_19.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_19.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_2.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_2.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_20.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_20.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_21.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_21.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_22.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_22.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_23.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_23.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_24.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_24.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_25.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_25.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_26.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_26.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_27.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_27.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_28.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_28.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_29.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_29.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_3.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_3.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_30.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_30.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_31.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_31.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_32.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_32.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_33.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_33.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_34.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_34.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_35.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_35.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_36.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_36.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_37.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_37.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_38.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_38.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_39.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_39.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_4.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_4.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_40.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_40.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_41.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_41.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_42.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_42.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_43.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_43.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_44.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_44.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_45.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_45.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_46.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_46.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_47.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_47.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_48.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_48.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_49.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_49.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_5.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_5.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_50.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_50.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_51.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_51.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_52.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_52.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_53.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_53.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_54.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_54.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_55.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_55.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_56.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_56.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_57.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_57.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_58.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_58.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_59.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_59.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_6.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_6.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_60.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_60.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_61.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_61.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_62.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_62.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_63.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_63.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_7.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_7.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_8.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_8.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_9.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.experts.expert_9.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.11.layer.1.mlp.router.classifier.weight": "model-00002-of-00004.safetensors", + "encoder.block.2.layer.0.SelfAttention.k.weight": "model-00001-of-00004.safetensors", + "encoder.block.2.layer.0.SelfAttention.o.weight": "model-00001-of-00004.safetensors", + "encoder.block.2.layer.0.SelfAttention.q.weight": "model-00001-of-00004.safetensors", + "encoder.block.2.layer.0.SelfAttention.v.weight": "model-00001-of-00004.safetensors", + "encoder.block.2.layer.0.layer_norm.weight": "model-00001-of-00004.safetensors", + "encoder.block.2.layer.1.layer_norm.weight": "model-00001-of-00004.safetensors", + "encoder.block.2.layer.1.mlp.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.2.layer.1.mlp.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.0.SelfAttention.k.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.0.SelfAttention.o.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.0.SelfAttention.q.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.0.SelfAttention.v.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.0.layer_norm.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.layer_norm.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_0.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_0.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_1.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_1.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_10.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_10.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_11.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_11.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_12.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_12.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_13.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_13.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_14.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_14.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_15.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_15.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_16.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_16.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_17.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_17.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_18.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_18.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_19.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_19.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_2.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_2.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_20.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_20.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_21.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_21.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_22.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_22.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_23.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_23.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_24.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_24.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_25.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_25.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_26.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_26.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_27.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_27.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_28.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_28.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_29.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_29.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_3.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_3.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_30.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_30.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_31.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_31.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_32.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_32.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_33.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_33.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_34.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_34.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_35.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_35.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_36.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_36.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_37.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_37.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_38.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_38.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_39.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_39.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_4.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_4.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_40.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_40.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_41.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_41.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_42.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_42.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_43.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_43.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_44.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_44.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_45.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_45.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_46.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_46.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_47.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_47.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_48.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_48.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_49.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_49.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_5.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_5.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_50.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_50.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_51.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_51.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_52.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_52.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_53.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_53.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_54.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_54.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_55.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_55.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_56.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_56.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_57.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_57.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_58.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_58.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_59.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_59.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_6.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_6.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_60.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_60.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_61.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_61.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_62.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_62.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_63.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_63.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_7.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_7.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_8.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_8.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_9.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.experts.expert_9.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.3.layer.1.mlp.router.classifier.weight": "model-00001-of-00004.safetensors", + "encoder.block.4.layer.0.SelfAttention.k.weight": "model-00001-of-00004.safetensors", + "encoder.block.4.layer.0.SelfAttention.o.weight": "model-00001-of-00004.safetensors", + "encoder.block.4.layer.0.SelfAttention.q.weight": "model-00001-of-00004.safetensors", + "encoder.block.4.layer.0.SelfAttention.v.weight": "model-00001-of-00004.safetensors", + "encoder.block.4.layer.0.layer_norm.weight": "model-00001-of-00004.safetensors", + "encoder.block.4.layer.1.layer_norm.weight": "model-00001-of-00004.safetensors", + "encoder.block.4.layer.1.mlp.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.4.layer.1.mlp.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.0.SelfAttention.k.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.0.SelfAttention.o.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.0.SelfAttention.q.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.0.SelfAttention.v.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.0.layer_norm.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.layer_norm.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_0.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_0.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_1.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_1.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_10.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_10.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_11.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_11.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_12.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_12.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_13.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_13.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_14.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_14.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_15.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_15.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_16.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_16.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_17.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_17.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_18.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_18.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_19.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_19.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_2.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_2.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_20.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_20.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_21.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_21.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_22.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_22.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_23.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_23.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_24.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_24.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_25.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_25.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_26.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_26.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_27.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_27.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_28.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_28.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_29.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_29.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_3.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_3.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_30.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_30.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_31.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_31.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_32.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_32.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_33.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_33.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_34.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_34.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_35.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_35.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_36.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_36.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_37.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_37.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_38.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_38.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_39.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_39.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_4.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_4.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_40.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_40.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_41.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_41.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_42.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_42.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_43.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_43.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_44.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_44.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_45.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_45.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_46.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_46.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_47.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_47.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_48.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_48.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_49.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_49.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_5.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_5.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_50.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_50.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_51.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_51.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_52.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_52.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_53.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_53.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_54.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_54.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_55.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_55.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_56.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_56.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_57.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_57.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_58.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_58.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_59.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_59.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_6.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_6.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_60.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_60.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_61.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_61.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_62.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_62.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_63.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_63.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_7.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_7.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_8.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_8.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_9.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.experts.expert_9.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.5.layer.1.mlp.router.classifier.weight": "model-00001-of-00004.safetensors", + "encoder.block.6.layer.0.SelfAttention.k.weight": "model-00001-of-00004.safetensors", + "encoder.block.6.layer.0.SelfAttention.o.weight": "model-00001-of-00004.safetensors", + "encoder.block.6.layer.0.SelfAttention.q.weight": "model-00001-of-00004.safetensors", + "encoder.block.6.layer.0.SelfAttention.v.weight": "model-00001-of-00004.safetensors", + "encoder.block.6.layer.0.layer_norm.weight": "model-00001-of-00004.safetensors", + "encoder.block.6.layer.1.layer_norm.weight": "model-00001-of-00004.safetensors", + "encoder.block.6.layer.1.mlp.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.6.layer.1.mlp.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.0.SelfAttention.k.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.0.SelfAttention.o.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.0.SelfAttention.q.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.0.SelfAttention.v.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.0.layer_norm.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.layer_norm.weight": "model-00002-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_0.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_0.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_1.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_1.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_10.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_10.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_11.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_11.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_12.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_12.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_13.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_13.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_14.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_14.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_15.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_15.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_16.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_16.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_17.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_17.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_18.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_18.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_19.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_19.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_2.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_2.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_20.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_20.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_21.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_21.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_22.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_22.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_23.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_23.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_24.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_24.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_25.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_25.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_26.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_26.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_27.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_27.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_28.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_28.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_29.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_29.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_3.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_3.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_30.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_30.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_31.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_31.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_32.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_32.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_33.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_33.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_34.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_34.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_35.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_35.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_36.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_36.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_37.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_37.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_38.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_38.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_39.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_39.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_4.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_4.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_40.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_40.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_41.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_41.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_42.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_42.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_43.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_43.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_44.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_44.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_45.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_45.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_46.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_46.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_47.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_47.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_48.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_48.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_49.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_49.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_5.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_5.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_50.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_50.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_51.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_51.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_52.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_52.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_53.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_53.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_54.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_54.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_55.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_55.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_56.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_56.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_57.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_57.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_58.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_58.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_59.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_59.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_6.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_6.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_60.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_60.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_61.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_61.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_62.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_62.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_63.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_63.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_7.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_7.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_8.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_8.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_9.wi.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.experts.expert_9.wo.weight": "model-00001-of-00004.safetensors", + "encoder.block.7.layer.1.mlp.router.classifier.weight": "model-00001-of-00004.safetensors", + "encoder.block.8.layer.0.SelfAttention.k.weight": "model-00002-of-00004.safetensors", + "encoder.block.8.layer.0.SelfAttention.o.weight": "model-00002-of-00004.safetensors", + "encoder.block.8.layer.0.SelfAttention.q.weight": "model-00002-of-00004.safetensors", + "encoder.block.8.layer.0.SelfAttention.v.weight": "model-00002-of-00004.safetensors", + "encoder.block.8.layer.0.layer_norm.weight": "model-00002-of-00004.safetensors", + "encoder.block.8.layer.1.layer_norm.weight": "model-00002-of-00004.safetensors", + "encoder.block.8.layer.1.mlp.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.8.layer.1.mlp.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.0.SelfAttention.k.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.0.SelfAttention.o.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.0.SelfAttention.q.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.0.SelfAttention.v.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.0.layer_norm.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.layer_norm.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_0.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_0.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_1.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_1.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_10.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_10.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_11.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_11.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_12.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_12.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_13.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_13.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_14.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_14.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_15.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_15.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_16.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_16.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_17.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_17.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_18.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_18.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_19.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_19.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_2.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_2.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_20.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_20.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_21.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_21.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_22.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_22.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_23.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_23.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_24.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_24.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_25.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_25.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_26.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_26.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_27.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_27.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_28.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_28.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_29.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_29.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_3.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_3.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_30.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_30.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_31.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_31.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_32.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_32.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_33.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_33.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_34.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_34.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_35.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_35.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_36.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_36.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_37.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_37.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_38.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_38.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_39.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_39.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_4.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_4.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_40.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_40.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_41.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_41.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_42.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_42.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_43.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_43.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_44.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_44.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_45.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_45.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_46.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_46.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_47.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_47.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_48.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_48.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_49.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_49.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_5.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_5.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_50.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_50.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_51.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_51.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_52.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_52.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_53.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_53.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_54.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_54.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_55.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_55.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_56.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_56.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_57.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_57.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_58.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_58.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_59.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_59.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_6.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_6.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_60.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_60.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_61.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_61.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_62.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_62.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_63.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_63.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_7.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_7.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_8.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_8.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_9.wi.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.experts.expert_9.wo.weight": "model-00002-of-00004.safetensors", + "encoder.block.9.layer.1.mlp.router.classifier.weight": "model-00002-of-00004.safetensors", + "encoder.final_layer_norm.weight": "model-00002-of-00004.safetensors", + "shared.weight": "model-00001-of-00004.safetensors" + } +}