from transformers import PretrainedConfig class TyphoonAudioConfig(PretrainedConfig): model_type = "typhoonaudio" def __init__( self, whisper_path="biodatlab/whisper-th-large-v3-combined", # or local path llm_path="scb10x/llama-3-typhoon-v1.5-8b-instruct", # or local path speech_qformer_token_num=1, speech_qformer_layer=2, second_per_frame=0.333333, second_stride=0.333333, lora=True, lora_alpha=32, lora_rank=8, lora_dropout=0.0, dtype="float16", **kwargs ): self.whisper_path = whisper_path self.llm_path = llm_path self.speech_qformer_token_num = speech_qformer_token_num self.speech_qformer_layer = speech_qformer_layer self.second_per_frame = second_per_frame self.second_stride = second_stride self.lora = lora self.lora_alpha = lora_alpha self.lora_rank = lora_rank self.lora_dropout = lora_dropout self.dtype = dtype super().__init__(**kwargs)