{ "_name_or_path": "Llama-3.1-8B-Omni", "architectures": [ "OmniSpeech2SLlamaForCausalLM" ], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "ctc_decoder_config": "(2,4096,32,11008)", "ctc_loss_weight": 1.0, "ctc_upsample_factor": 25, "eos_token_id": [ 128001, 128008, 128009 ], "freeze_speech_projector": false, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 131072, "mlp_bias": false, "model_type": "omni_speech2s_llama", "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": { "factor": 8.0, "high_freq_factor": 4.0, "low_freq_factor": 1.0, "original_max_position_embeddings": 8192, "rope_type": "llama3" }, "rope_theta": 500000.0, "speech_encoder": "models/speech_encoder/large-v3.pt", "speech_encoder_ds_rate": 5, "speech_encoder_hidden_size": 1280, "speech_encoder_type": "whisper", "speech_generator_type": "ctc", "speech_normalize": false, "speech_projector_lr": null, "speech_projector_type": "linear", "tie_word_embeddings": false, "tokenizer_model_max_length": 2048, "tokenizer_padding_side": "right", "torch_dtype": "float16", "transformers_version": "4.43.4", "tune_speech_projector": false, "unit_vocab_size": 1000, "use_cache": true, "vocab_size": 128256 }