|
|
|
|
|
|
|
|
|
|
|
|
|
llama_hub: meta-llama/Meta-Llama-3-8B-Instruct |
|
llama3_folder: llama3_checkpoint |
|
|
|
|
|
num_beams: 3 |
|
max_new_tokens: 400 |
|
top_k: 500 |
|
top_p: 0.95 |
|
temperature: 0.1 |
|
repetition_penalty: 1.1 |
|
|
|
|
|
lora_dropout: 0.05 |
|
lora_alpha: 16 |
|
r: 8 |
|
bias: "none" |
|
task_type: "CAUSAL_LM" |
|
lora_target_modules: ["q_proj", "v_proj"] |
|
|
|
|
|
whisper_hub: openai/whisper-large |
|
whisper_folder: whisper_checkpoint |
|
freeze_whisper: True |
|
whisper_output_dim: 1280 |
|
|
|
|
|
pooling_kernel: 20 |
|
|
|
|
|
tltr_layers: 32 |
|
llama_hidden_size: 4096 |
|
|
|
|
|
audio_padding_mask: !name:speechbrain.dataio.dataio.length_to_mask |
|
text_padding_mask: !name:speechbrain.lobes.models.transformer.Transformer.get_key_padding_mask |
|
|
|
whisper: !new:speechbrain.lobes.models.huggingface_transformers.whisper.Whisper |
|
source: !ref <whisper_hub> |
|
freeze: !ref <freeze_whisper> |
|
save_path: !ref <whisper_folder> |
|
encoder_only: True |
|
output_all_hiddens: True |
|
|
|
avg_pool: !new:speechbrain.nnet.pooling.Pooling1d |
|
pool_type: "avg" |
|
kernel_size: !ref <pooling_kernel> |
|
|
|
tltr: !new:speechbrain.lobes.models.TLTR.AT_MODEL |
|
n_layer: !ref <tltr_layers> |
|
rep_dim: !ref <whisper_output_dim> |
|
freeze: True |
|
|
|
audio_proj: !new:speechbrain.lobes.models.TLTR.AudioProjection |
|
input_size: !ref <whisper_output_dim> |
|
hidden_size: !ref <llama_hidden_size> |
|
|
|
|
|
|
|
llama3: !new:speechbrain.lobes.models.huggingface_transformers.llama2.LLAMA2 |
|
source: !ref <llama_hub> |
|
freeze: True |
|
save_path: !ref <llama3_folder> |
|
max_new_tokens: !ref <max_new_tokens> |
|
num_beams: !ref <num_beams> |
|
top_k: !ref <top_k> |
|
top_p: !ref <top_p> |
|
temperature: !ref <temperature> |
|
repetition_penalty: !ref <repetition_penalty> |
|
with_peft: True |
|
lora_alpha: !ref <lora_alpha> |
|
lora_dropout: !ref <lora_dropout> |
|
r: !ref <r> |
|
bias: !ref <bias> |
|
task_type: !ref <task_type> |
|
lora_target_modules: !ref <lora_target_modules> |
|
|
|
modules: |
|
tltr: !ref <tltr> |
|
audio_proj: !ref <audio_proj> |
|
llama3: !ref <llama3> |
|
|
|
model: !new:torch.nn.ModuleList |
|
- [!ref <tltr>, !ref <audio_proj>] |
|
|
|
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer |
|
loadables: |
|
llama3: !ref <llama3> |
|
model: !ref <model> |
|
|