llama-3-typhoon-v1.5-8b-audio-preview / configuration_typhoonaudio.py
potsawee's picture
Upload TyphoonAudio
253101d verified
raw
history blame
1.08 kB
from transformers import PretrainedConfig
class TyphoonAudioConfig(PretrainedConfig):
model_type = "typhoonaudio"
def __init__(
self,
whisper_path="biodatlab/whisper-th-large-v3-combined", # or local path
llm_path="scb10x/llama-3-typhoon-v1.5-8b-instruct", # or local path
speech_qformer_token_num=1,
speech_qformer_layer=2,
second_per_frame=0.333333,
second_stride=0.333333,
lora=True,
lora_alpha=32,
lora_rank=8,
lora_dropout=0.0,
dtype="float16",
**kwargs
):
self.whisper_path = whisper_path
self.llm_path = llm_path
self.speech_qformer_token_num = speech_qformer_token_num
self.speech_qformer_layer = speech_qformer_layer
self.second_per_frame = second_per_frame
self.second_stride = second_stride
self.lora = lora
self.lora_alpha = lora_alpha
self.lora_rank = lora_rank
self.lora_dropout = lora_dropout
self.dtype = dtype
super().__init__(**kwargs)