{ "producer": { "name": "modelopt", "version": "0.13.0" }, "architecture": "LlamaForCausalLM", "dtype": "float16", "logits_dtype": "float16", "num_hidden_layers": 32, "num_attention_heads": 32, "num_key_value_heads": 32, "hidden_size": 4096, "norm_epsilon": 1e-05, "vocab_size": 32000, "max_position_embeddings": 4096, "hidden_act": "silu", "use_parallel_embedding": true, "embedding_sharding_dim": 0, "quantization": { "quant_algo": "W8A8_SQ_PER_CHANNEL", "kv_cache_quant_algo": "FP8" }, "mapping": { "world_size": 1, "tp_size": 1, "pp_size": 1 }, "head_size": 128, "intermediate_size": 11008, "position_embedding_type": "rope_gpt_neox", "share_embedding_table": false, "residual_mlp": false, "bias": false, "rotary_pct": 1.0, "rank": 0, "decoder": "llama", "rmsnorm": true, "lm_head_bias": false, "rotary_base": 10000.0 }