|
add_qkv_bias: true |
|
asr_adapter: llamamlp |
|
attn_dropout: 0.0 |
|
bias: false |
|
block_size: 2048 |
|
force_align: false |
|
gelu_approximate: none |
|
head_size: 64 |
|
hf_config: |
|
name: Qwen2-0.5B |
|
org: Qwen |
|
intermediate_size: 4864 |
|
lm_head_bias: false |
|
mlp_class_name: LLaMAMLP |
|
n_embd: 896 |
|
n_expert: 0 |
|
n_expert_per_token: 0 |
|
n_head: 14 |
|
n_layer: 24 |
|
n_query_groups: 2 |
|
name: Qwen2-0.5B |
|
norm_class_name: RMSNorm |
|
norm_eps: 1.0e-06 |
|
padded_vocab_size: 181120 |
|
padding_multiple: 512 |
|
parallel_residual: false |
|
pos_type: rope |
|
post_adapter: false |
|
post_adapter_layers: 6 |
|
prompt_vocab_size: null |
|
rope_base: 1000000 |
|
rope_condense_ratio: 1 |
|
rotary_percentage: 1 |
|
scale_embeddings: false |
|
shared_attention_norm: false |
|
tie_word_embeddings: true |
|
use_pretrain_phoneme_emb: false |
|
vocab_size: 50254 |
|
text_vocab_size: 152000 |
|
cat_audio_vocab_size: 29120 |
|
audio_vocab_size: 4160 |
|
whisper_adapter_dim: 768 |