{ "metadata": { "ParamSize": 63, "ParamBytes": 7862016.0, "BitsPerParam": 4.504111095833906 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 7862016, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 4096, 48 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 0 }, { "name": "model.embed_tokens.q_scale", "shape": [ 4096, 12 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 98304, "byteOffset": 786432 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 884736 }, { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 384, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 885504 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18432, "byteOffset": 1032960 }, { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 1536, 48 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 1051392 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 1536, 12 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 1346304 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 1383168 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 2048, 48 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 1383936 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 2048, 12 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 1777152 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 384, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 196608, "byteOffset": 1826304 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 384, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 2022912 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 2047488 }, { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 384, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 2048256 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18432, "byteOffset": 2195712 }, { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 1536, 48 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 2214144 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 1536, 12 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 2509056 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 2545920 }, { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 2048, 48 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 2546688 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 2048, 12 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 2939904 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 384, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 196608, "byteOffset": 2989056 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 384, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 3185664 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 3210240 }, { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 384, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 3211008 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18432, "byteOffset": 3358464 }, { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 1536, 48 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 3376896 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 1536, 12 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 3671808 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 3708672 }, { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 2048, 48 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 3709440 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 2048, 12 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 4102656 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 384, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 196608, "byteOffset": 4151808 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 384, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 4348416 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 4372992 }, { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 384, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 4373760 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18432, "byteOffset": 4521216 }, { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 1536, 48 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 4539648 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 1536, 12 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 4834560 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 4871424 }, { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 2048, 48 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 4872192 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 2048, 12 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 5265408 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 384, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 196608, "byteOffset": 5314560 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 384, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 5511168 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 5535744 }, { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 384, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 5536512 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18432, "byteOffset": 5683968 }, { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 1536, 48 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 5702400 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 1536, 12 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 5997312 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 6034176 }, { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 2048, 48 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 6034944 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 2048, 12 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 6428160 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 384, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 196608, "byteOffset": 6477312 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 384, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 6673920 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 6698496 }, { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 384, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 6699264 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18432, "byteOffset": 6846720 }, { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 1536, 48 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 6865152 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 1536, 12 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 7160064 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 7196928 }, { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 2048, 48 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 7197696 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 2048, 12 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 7590912 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 384, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 196608, "byteOffset": 7640064 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 384, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 7836672 }, { "name": "model.norm.weight", "shape": [ 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 7861248 } ], "md5sum": "c4a7c141bffb4bf77b662a6b32138dc1" } ] }