|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
seed: 1986 |
|
__set_seed: !apply:torch.manual_seed [1986] |
|
|
|
prepared_folder: results/prepared |
|
output_folder: results/better_tokenizer/1986 |
|
save_folder: results/better_tokenizer/1986/save |
|
train_log: results/better_tokenizer/1986/train_log.txt |
|
log_folder: results/better_tokenizer/1986/log |
|
|
|
|
|
|
|
|
|
data_folder: /slurp/audio |
|
data_folder_rirs: /slurp/audio |
|
train_splits: [train_synthetic, train_real] |
|
csv_train: results/prepared/train-type=direct-sample=0.2.csv |
|
csv_valid: results/prepared/devel-type=direct-sample=0.2.csv |
|
csv_test: results/prepared/test-type=direct.csv |
|
tokenizer_file: https://www.dropbox.com/s/tmwq12r5vgcsif9/58_unigram.model?dl=1 |
|
skip_prep: false |
|
|
|
|
|
number_of_epochs: 60 |
|
batch_size: 12 |
|
lr: 0.0003 |
|
|
|
sorting: random |
|
ckpt_interval_minutes: 15 |
|
|
|
|
|
sample_rate: 16000 |
|
emb_size: 128 |
|
dec_neurons: 512 |
|
output_neurons: 58 |
|
ASR_encoder_dim: 512 |
|
encoder_dim: 256 |
|
|
|
|
|
bos_index: 0 |
|
eos_index: 0 |
|
min_decode_ratio: 0.0 |
|
max_decode_ratio: 10.0 |
|
slu_beam_size: 80 |
|
eos_threshold: 1.5 |
|
temperature: 1.25 |
|
|
|
dataloader_opts: |
|
batch_size: 12 |
|
shuffle: true |
|
|
|
epoch_counter: &id009 !new:speechbrain.utils.epoch_loop.EpochCounter |
|
|
|
|
|
limit: 60 |
|
|
|
|
|
asr_model: !apply:speechbrain.pretrained.EncoderDecoderASR.from_hparams |
|
source: speechbrain/asr-crdnn-rnnlm-librispeech |
|
run_opts: {device: cuda:0} |
|
|
|
slu_enc: &id001 !new:speechbrain.nnet.containers.Sequential |
|
input_shape: [null, null, 512] |
|
lstm: !new:speechbrain.nnet.RNN.LSTM |
|
input_size: 512 |
|
bidirectional: true |
|
hidden_size: 256 |
|
num_layers: 2 |
|
linear: !new:speechbrain.nnet.linear.Linear |
|
input_size: 512 |
|
n_neurons: 256 |
|
|
|
output_emb: &id002 !new:speechbrain.nnet.embedding.Embedding |
|
num_embeddings: 58 |
|
embedding_dim: 128 |
|
|
|
dec: &id003 !new:speechbrain.nnet.RNN.AttentionalRNNDecoder |
|
enc_dim: 256 |
|
input_size: 128 |
|
rnn_type: gru |
|
attn_type: keyvalue |
|
hidden_size: 512 |
|
attn_dim: 512 |
|
num_layers: 3 |
|
scaling: 1.0 |
|
dropout: 0.0 |
|
|
|
seq_lin: &id004 !new:speechbrain.nnet.linear.Linear |
|
input_size: 512 |
|
n_neurons: 58 |
|
|
|
env_corrupt: &id005 !new:speechbrain.lobes.augment.EnvCorrupt |
|
|
|
openrir_folder: /slurp/audio |
|
babble_prob: 0.0 |
|
reverb_prob: 0.0 |
|
noise_prob: 1.0 |
|
noise_snr_low: 0 |
|
noise_snr_high: 15 |
|
|
|
modules: |
|
slu_enc: *id001 |
|
output_emb: *id002 |
|
dec: *id003 |
|
seq_lin: *id004 |
|
env_corrupt: *id005 |
|
model: &id007 !new:torch.nn.ModuleList |
|
- [*id001, *id002, *id003, *id004] |
|
tokenizer: &id006 !new:sentencepiece.SentencePieceProcessor |
|
|
|
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer |
|
collect_in: results/better_tokenizer/1986/save/SLURM_tokenizer |
|
loadables: |
|
tokenizer: *id006 |
|
paths: |
|
tokenizer: https://www.dropbox.com/s/tmwq12r5vgcsif9/58_unigram.model?dl=1 |
|
|
|
beam_searcher: !new:speechbrain.decoders.S2SRNNBeamSearcher |
|
embedding: *id002 |
|
decoder: *id003 |
|
linear: *id004 |
|
bos_index: 0 |
|
eos_index: 0 |
|
min_decode_ratio: 0.0 |
|
max_decode_ratio: 10.0 |
|
beam_size: 80 |
|
eos_threshold: 1.5 |
|
temperature: 1.25 |
|
using_max_attn_shift: false |
|
max_attn_shift: 30 |
|
coverage_penalty: 0. |
|
|
|
opt_class: !name:torch.optim.Adam |
|
lr: 0.0003 |
|
|
|
lr_annealing: &id008 !new:speechbrain.nnet.schedulers.NewBobScheduler |
|
initial_value: 0.0003 |
|
improvement_threshold: 0.0025 |
|
annealing_factor: 0.8 |
|
patient: 0 |
|
|
|
checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer |
|
checkpoints_dir: results/better_tokenizer/1986/save |
|
recoverables: |
|
model: *id007 |
|
scheduler: *id008 |
|
counter: *id009 |
|
augmentation: !new:speechbrain.lobes.augment.TimeDomainSpecAugment |
|
sample_rate: 16000 |
|
speeds: [95, 100, 105] |
|
|
|
log_softmax: !new:speechbrain.nnet.activations.Softmax |
|
apply_log: true |
|
|
|
seq_cost: !name:speechbrain.nnet.losses.nll_loss |
|
label_smoothing: 0.1 |
|
|
|
|
|
|
|
train_logger: !new:speechbrain.utils.train_logger.TensorboardLogger |
|
save_dir: results/better_tokenizer/1986/log |
|
|
|
error_rate_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats |
|
|
|
cer_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats |
|
split_tokens: true |
|
|