Yushi Ueda commited on
Commit
2acac75
1 Parent(s): f598588

Update model

Browse files
README.md ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - diarization
6
+ datasets:
7
+ - mini_librispeech
8
+ license: cc-by-4.0
9
+ ---
10
+
11
+ ## ESPnet2 DIAR model
12
+
13
+ ### `YushiUeda/test`
14
+
15
+ This model was trained by Yushi Ueda using mini_librispeech recipe in [espnet](https://github.com/espnet/espnet/).
16
+
17
+ ### Demo: How to use in ESPnet2
18
+
19
+ ```bash
20
+ cd espnet
21
+ git checkout 4dfa2be4331d3d68f124aa5fd81f63217a7278a4
22
+ pip install -e .
23
+ cd egs2/mini_librispeech/diar1
24
+ ./run.sh --skip_data_prep false --skip_train true --download_model YushiUeda/test
25
+ ```
26
+
27
+ <!-- Generated by scripts/utils/show_diar_result.sh -->
28
+ # RESULTS
29
+ ## Environments
30
+ - date: `Wed Aug 25 23:29:07 EDT 2021`
31
+ - python version: `3.7.11 (default, Jul 27 2021, 14:32:16) [GCC 7.5.0]`
32
+ - espnet version: `espnet 0.10.2a1`
33
+ - pytorch version: `pytorch 1.9.0+cu102`
34
+ - Git hash: `19bcd34f9395e01e54a97c4db5ecbcedb429dd92`
35
+ - Commit date: `Tue Aug 24 19:50:44 2021 -0400`
36
+
37
+ ## `diar_train_diar_raw_max_epoch20`
38
+ ### DER
39
+ `dev_clean_2_ns2_beta2_500`
40
+
41
+ |threshold_median_collar|DER|
42
+ |---|---|
43
+ |result_th0.3_med1_collar0.0|32.42|
44
+ |result_th0.3_med11_collar0.0|32.03|
45
+ |result_th0.4_med1_collar0.0|30.96|
46
+ |result_th0.4_med11_collar0.0|30.26|
47
+ |result_th0.5_med1_collar0.0|30.35|
48
+ |result_th0.5_med11_collar0.0|29.37|
49
+ |result_th0.6_med1_collar0.0|30.77|
50
+ |result_th0.6_med11_collar0.0|29.52|
51
+ |result_th0.7_med1_collar0.0|32.60|
52
+ |result_th0.7_med11_collar0.0|31.03|
53
+
54
+ ## DIAR config
55
+
56
+ <details><summary>expand</summary>
57
+
58
+ ```
59
+ config: conf/train_diar.yaml
60
+ print_config: false
61
+ log_level: INFO
62
+ dry_run: false
63
+ iterator_type: chunk
64
+ output_dir: exp/diar_train_diar_raw_max_epoch20
65
+ ngpu: 1
66
+ seed: 0
67
+ num_workers: 1
68
+ num_att_plot: 3
69
+ dist_backend: nccl
70
+ dist_init_method: env://
71
+ dist_world_size: null
72
+ dist_rank: null
73
+ local_rank: 0
74
+ dist_master_addr: null
75
+ dist_master_port: null
76
+ dist_launcher: null
77
+ multiprocessing_distributed: false
78
+ unused_parameters: false
79
+ sharded_ddp: false
80
+ cudnn_enabled: true
81
+ cudnn_benchmark: false
82
+ cudnn_deterministic: true
83
+ collect_stats: false
84
+ write_collected_feats: false
85
+ max_epoch: 20
86
+ patience: 3
87
+ val_scheduler_criterion:
88
+ - valid
89
+ - loss
90
+ early_stopping_criterion:
91
+ - valid
92
+ - loss
93
+ - min
94
+ best_model_criterion:
95
+ - - valid
96
+ - acc
97
+ - max
98
+ keep_nbest_models: 3
99
+ grad_clip: 5
100
+ grad_clip_type: 2.0
101
+ grad_noise: false
102
+ accum_grad: 2
103
+ no_forward_run: false
104
+ resume: true
105
+ train_dtype: float32
106
+ use_amp: false
107
+ log_interval: null
108
+ use_tensorboard: true
109
+ use_wandb: false
110
+ wandb_project: null
111
+ wandb_id: null
112
+ wandb_entity: null
113
+ wandb_name: null
114
+ wandb_model_log_interval: -1
115
+ detect_anomaly: false
116
+ pretrain_path: null
117
+ init_param: []
118
+ ignore_init_mismatch: false
119
+ freeze_param: []
120
+ num_iters_per_epoch: null
121
+ batch_size: 16
122
+ valid_batch_size: null
123
+ batch_bins: 1000000
124
+ valid_batch_bins: null
125
+ train_shape_file:
126
+ - exp/diar_stats_8k/train/speech_shape
127
+ - exp/diar_stats_8k/train/spk_labels_shape
128
+ valid_shape_file:
129
+ - exp/diar_stats_8k/valid/speech_shape
130
+ - exp/diar_stats_8k/valid/spk_labels_shape
131
+ batch_type: folded
132
+ valid_batch_type: null
133
+ fold_length:
134
+ - 80000
135
+ - 800
136
+ sort_in_batch: descending
137
+ sort_batch: descending
138
+ multiple_iterator: false
139
+ chunk_length: 200000
140
+ chunk_shift_ratio: 0.5
141
+ num_cache_chunks: 64
142
+ train_data_path_and_name_and_type:
143
+ - - dump/raw/simu/data/train_clean_5_ns2_beta2_500/wav.scp
144
+ - speech
145
+ - sound
146
+ - - dump/raw/simu/data/train_clean_5_ns2_beta2_500/espnet_rttm
147
+ - spk_labels
148
+ - rttm
149
+ valid_data_path_and_name_and_type:
150
+ - - dump/raw/simu/data/dev_clean_2_ns2_beta2_500/wav.scp
151
+ - speech
152
+ - sound
153
+ - - dump/raw/simu/data/dev_clean_2_ns2_beta2_500/espnet_rttm
154
+ - spk_labels
155
+ - rttm
156
+ allow_variable_data_keys: false
157
+ max_cache_size: 0.0
158
+ max_cache_fd: 32
159
+ valid_max_cache_size: null
160
+ optim: adam
161
+ optim_conf:
162
+ lr: 0.01
163
+ scheduler: noamlr
164
+ scheduler_conf:
165
+ warmup_steps: 1000
166
+ num_spk: 2
167
+ init: xavier_uniform
168
+ input_size: null
169
+ model_conf:
170
+ loss_type: pit
171
+ use_preprocessor: true
172
+ frontend: default
173
+ frontend_conf:
174
+ fs: 8k
175
+ hop_length: 128
176
+ normalize: global_mvn
177
+ normalize_conf:
178
+ stats_file: exp/diar_stats_8k/train/feats_stats.npz
179
+ encoder: transformer
180
+ encoder_conf:
181
+ input_layer: linear
182
+ num_blocks: 2
183
+ linear_units: 512
184
+ dropout_rate: 0.1
185
+ output_size: 256
186
+ attention_heads: 4
187
+ attention_dropout_rate: 0.0
188
+ decoder: linear
189
+ decoder_conf: {}
190
+ label_aggregator: label_aggregator
191
+ label_aggregator_conf: {}
192
+ required:
193
+ - output_dir
194
+ version: 0.10.2a1
195
+ distributed: false
196
+ ```
197
+
198
+ </details>
199
+
200
+
exp/diar_stats_8k/train/feats_stats.npz ADDED
Binary file (1.4 kB). View file
 
exp/diar_train_diar_raw_max_epoch20/19epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c133eb6c494ec800b8be6b6c14aa91e890af7b19488f36423a7d56105987f493
3
+ size 4404388
exp/diar_train_diar_raw_max_epoch20/RESULTS.md ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by scripts/utils/show_diar_result.sh -->
2
+ # RESULTS
3
+ ## Environments
4
+ - date: `Wed Aug 25 23:29:07 EDT 2021`
5
+ - python version: `3.7.11 (default, Jul 27 2021, 14:32:16) [GCC 7.5.0]`
6
+ - espnet version: `espnet 0.10.2a1`
7
+ - pytorch version: `pytorch 1.9.0+cu102`
8
+ - Git hash: `19bcd34f9395e01e54a97c4db5ecbcedb429dd92`
9
+ - Commit date: `Tue Aug 24 19:50:44 2021 -0400`
10
+
11
+ ## `diar_train_diar_raw_max_epoch20`
12
+ ### DER
13
+ `dev_clean_2_ns2_beta2_500`
14
+
15
+ |threshold_median_collar|DER|
16
+ |---|---|
17
+ |result_th0.3_med1_collar0.0|32.42|
18
+ |result_th0.3_med11_collar0.0|32.03|
19
+ |result_th0.4_med1_collar0.0|30.96|
20
+ |result_th0.4_med11_collar0.0|30.26|
21
+ |result_th0.5_med1_collar0.0|30.35|
22
+ |result_th0.5_med11_collar0.0|29.37|
23
+ |result_th0.6_med1_collar0.0|30.77|
24
+ |result_th0.6_med11_collar0.0|29.52|
25
+ |result_th0.7_med1_collar0.0|32.60|
26
+ |result_th0.7_med11_collar0.0|31.03|
exp/diar_train_diar_raw_max_epoch20/config.yaml ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/train_diar.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: chunk
6
+ output_dir: exp/diar_train_diar_raw_max_epoch20
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 1
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: null
14
+ dist_rank: null
15
+ local_rank: 0
16
+ dist_master_addr: null
17
+ dist_master_port: null
18
+ dist_launcher: null
19
+ multiprocessing_distributed: false
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 20
28
+ patience: 3
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - acc
39
+ - max
40
+ keep_nbest_models: 3
41
+ grad_clip: 5
42
+ grad_clip_type: 2.0
43
+ grad_noise: false
44
+ accum_grad: 2
45
+ no_forward_run: false
46
+ resume: true
47
+ train_dtype: float32
48
+ use_amp: false
49
+ log_interval: null
50
+ use_tensorboard: true
51
+ use_wandb: false
52
+ wandb_project: null
53
+ wandb_id: null
54
+ wandb_entity: null
55
+ wandb_name: null
56
+ wandb_model_log_interval: -1
57
+ detect_anomaly: false
58
+ pretrain_path: null
59
+ init_param: []
60
+ ignore_init_mismatch: false
61
+ freeze_param: []
62
+ num_iters_per_epoch: null
63
+ batch_size: 16
64
+ valid_batch_size: null
65
+ batch_bins: 1000000
66
+ valid_batch_bins: null
67
+ train_shape_file:
68
+ - exp/diar_stats_8k/train/speech_shape
69
+ - exp/diar_stats_8k/train/spk_labels_shape
70
+ valid_shape_file:
71
+ - exp/diar_stats_8k/valid/speech_shape
72
+ - exp/diar_stats_8k/valid/spk_labels_shape
73
+ batch_type: folded
74
+ valid_batch_type: null
75
+ fold_length:
76
+ - 80000
77
+ - 800
78
+ sort_in_batch: descending
79
+ sort_batch: descending
80
+ multiple_iterator: false
81
+ chunk_length: 200000
82
+ chunk_shift_ratio: 0.5
83
+ num_cache_chunks: 64
84
+ train_data_path_and_name_and_type:
85
+ - - dump/raw/simu/data/train_clean_5_ns2_beta2_500/wav.scp
86
+ - speech
87
+ - sound
88
+ - - dump/raw/simu/data/train_clean_5_ns2_beta2_500/espnet_rttm
89
+ - spk_labels
90
+ - rttm
91
+ valid_data_path_and_name_and_type:
92
+ - - dump/raw/simu/data/dev_clean_2_ns2_beta2_500/wav.scp
93
+ - speech
94
+ - sound
95
+ - - dump/raw/simu/data/dev_clean_2_ns2_beta2_500/espnet_rttm
96
+ - spk_labels
97
+ - rttm
98
+ allow_variable_data_keys: false
99
+ max_cache_size: 0.0
100
+ max_cache_fd: 32
101
+ valid_max_cache_size: null
102
+ optim: adam
103
+ optim_conf:
104
+ lr: 0.01
105
+ scheduler: noamlr
106
+ scheduler_conf:
107
+ warmup_steps: 1000
108
+ num_spk: 2
109
+ init: xavier_uniform
110
+ input_size: null
111
+ model_conf:
112
+ loss_type: pit
113
+ use_preprocessor: true
114
+ frontend: default
115
+ frontend_conf:
116
+ fs: 8k
117
+ hop_length: 128
118
+ normalize: global_mvn
119
+ normalize_conf:
120
+ stats_file: exp/diar_stats_8k/train/feats_stats.npz
121
+ encoder: transformer
122
+ encoder_conf:
123
+ input_layer: linear
124
+ num_blocks: 2
125
+ linear_units: 512
126
+ dropout_rate: 0.1
127
+ output_size: 256
128
+ attention_heads: 4
129
+ attention_dropout_rate: 0.0
130
+ decoder: linear
131
+ decoder_conf: {}
132
+ label_aggregator: label_aggregator
133
+ label_aggregator_conf: {}
134
+ required:
135
+ - output_dir
136
+ version: 0.10.2a1
137
+ distributed: false
exp/diar_train_diar_raw_max_epoch20/images/acc.png ADDED
exp/diar_train_diar_raw_max_epoch20/images/backward_time.png ADDED
exp/diar_train_diar_raw_max_epoch20/images/cf.png ADDED
exp/diar_train_diar_raw_max_epoch20/images/der.png ADDED
exp/diar_train_diar_raw_max_epoch20/images/fa.png ADDED
exp/diar_train_diar_raw_max_epoch20/images/forward_time.png ADDED
exp/diar_train_diar_raw_max_epoch20/images/gpu_max_cached_mem_GB.png ADDED
exp/diar_train_diar_raw_max_epoch20/images/iter_time.png ADDED
exp/diar_train_diar_raw_max_epoch20/images/loss.png ADDED
exp/diar_train_diar_raw_max_epoch20/images/mi.png ADDED
exp/diar_train_diar_raw_max_epoch20/images/optim0_lr0.png ADDED
exp/diar_train_diar_raw_max_epoch20/images/optim_step_time.png ADDED
exp/diar_train_diar_raw_max_epoch20/images/sad_fr.png ADDED
exp/diar_train_diar_raw_max_epoch20/images/sad_mr.png ADDED
exp/diar_train_diar_raw_max_epoch20/images/train_time.png ADDED
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: 0.10.2a1
2
+ files:
3
+ model_file: exp/diar_train_diar_raw_max_epoch20/19epoch.pth
4
+ python: "3.7.11 (default, Jul 27 2021, 14:32:16) \n[GCC 7.5.0]"
5
+ timestamp: 1629948586.911716
6
+ torch: 1.9.0+cu102
7
+ yaml_files:
8
+ train_config: exp/diar_train_diar_raw_max_epoch20/config.yaml