[llama] model_name = XVERSE head_num = 40 size_per_head = 128 inter_size = 13824 num_layer = 40 rotary_embedding = 128 layernorm_eps = 1e-06 vocab_size = 100278 start_id = 2 end_id = 3 tensor_para_size = 1 weight_data_type = fp16