File size: 3,017 Bytes
223340a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
device: "Tesla P100-PCIE-16GB"

base:
  name: "OpenSLUv1"
  multi_intent: true
  train: true
  test: true
  device: cuda
  seed: 42
  epoch_num: 50
  batch_size: 64
  ignore_index: -100

model_manager:
  load_dir: null
  save_dir: save/agif-mix-snips

evaluator:
  best_key: EMA
  eval_by_epoch: true
  # eval_step: 1800
  metric:
    - intent_acc
    - intent_f1
    - slot_f1
    - EMA

accelerator:
  use_accelerator: false

dataset:
  dataset_name: mix-snips

tokenizer:
    _tokenizer_name_: word_tokenizer
    _padding_side_: right
    _align_mode_: fast
    add_special_tokens: false
    max_length: 512

optimizer:
  _model_target_: torch.optim.Adam
  _model_partial_: true
  lr: 0.001
  weight_decay: 1e-6

scheduler:
  _model_target_: transformers.get_scheduler
  _model_partial_: true
  name : "linear"
  num_warmup_steps: 0

model:
  _model_target_: model.OpenSLUModel

  encoder:
    _model_target_: model.encoder.AutoEncoder
    encoder_name: self-attention-lstm

    embedding:
      embedding_dim: 128
      dropout_rate: 0.4

    lstm:
      layer_num: 1
      bidirectional: true
      output_dim: 256
      dropout_rate: 0.4

    attention:
      hidden_dim: 1024
      output_dim: 128
      dropout_rate: 0.4

    unflat_attention:
      dropout_rate: 0.4
    output_dim: "{model.encoder.lstm.output_dim} + {model.encoder.attention.output_dim}"
    return_with_input: true
    return_sentence_level_hidden: true

  decoder:
    _model_target_: model.decoder.AGIFDecoder
#    teacher_forcing: true
    interaction:
      _model_target_: model.decoder.interaction.AGIFInteraction
      intent_embedding_dim: 128
      input_dim: "{model.encoder.output_dim}"
      hidden_dim: 128
      output_dim: "{model.decoder.interaction.intent_embedding_dim}"
      dropout_rate: 0.4
      alpha: 0.2
      num_heads: 4
      num_layers: 2
      row_normalized: true

    intent_classifier:
      _model_target_: model.decoder.classifier.MLPClassifier
      mode: "intent"
      mlp:
        - _model_target_: torch.nn.Linear
          in_features: "{model.encoder.output_dim}"
          out_features: 256
        - _model_target_: torch.nn.LeakyReLU
          negative_slope: 0.2
        - _model_target_: torch.nn.Linear
          in_features: 256
          out_features: "{base.intent_label_num}"
      dropout_rate: 0.4
      loss_fn:
        _model_target_: torch.nn.BCEWithLogitsLoss
      use_multi: "{base.multi_intent}"
      multi_threshold: 0.5
      return_sentence_level: true
      ignore_index: -100
      weight: 0.3

    slot_classifier:
      _model_target_: model.decoder.classifier.AutoregressiveLSTMClassifier
      mode: "slot"
      input_dim: "{model.encoder.output_dim}"
      layer_num: 1
      bidirectional: false
      force_ratio: 0.9
      hidden_dim: "{model.decoder.interaction.intent_embedding_dim}"
      embedding_dim: 128
      ignore_index: -100
      dropout_rate: 0.4
      use_multi: false
      multi_threshold: 0.5
      return_sentence_level: false
      weight: 0.7