first commit
Browse files- .gitattributes +2 -0
- config.json +3 -0
- embedding_model.ckpt +3 -0
- hyperparams.yaml +64 -0
- projector.ckpt +3 -0
- train_hyp.yaml +252 -0
- train_log.txt +50 -0
.gitattributes
CHANGED
@@ -32,3 +32,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
+
embedding_model.ckpt filter=lfs diff=lfs merge=lfs -text
|
36 |
+
projector.ckpt filter=lfs diff=lfs merge=lfs -text
|
config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"speechbrain_interface": "SpeakerRecognition"
|
3 |
+
}
|
embedding_model.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:34414582d17cf0b5f9b63e44b46fe1217343b0211b97a0c0b1e7b07da9f3b58f
|
3 |
+
size 84883955
|
hyperparams.yaml
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# ############################################################################
|
2 |
+
# Model: ECAPA big for Speaker verification
|
3 |
+
# ############################################################################
|
4 |
+
|
5 |
+
# Feature parameters
|
6 |
+
n_mels: 80
|
7 |
+
|
8 |
+
# Pretrain folder (HuggingFace)
|
9 |
+
pretrained_path: gorinars/sb-ecapa-vggsound-uvgscl
|
10 |
+
|
11 |
+
# Output parameters
|
12 |
+
out_n_neurons: 308
|
13 |
+
|
14 |
+
# Model params
|
15 |
+
compute_features: !new:speechbrain.lobes.features.Fbank
|
16 |
+
n_mels: 80
|
17 |
+
left_frames: 0
|
18 |
+
right_frames: 0
|
19 |
+
deltas: false
|
20 |
+
sample_rate: 16000
|
21 |
+
n_fft: 400
|
22 |
+
win_length: 25
|
23 |
+
hop_length: 10
|
24 |
+
f_min: 0
|
25 |
+
|
26 |
+
|
27 |
+
mean_var_norm: !new:speechbrain.processing.features.InputNormalization
|
28 |
+
norm_type: sentence
|
29 |
+
std_norm: False
|
30 |
+
|
31 |
+
|
32 |
+
embedding_model: !new:speechbrain.nnet.containers.LengthsCapableSequential
|
33 |
+
input_shape: [null, 1, null]
|
34 |
+
embedding: !new:speechbrain.lobes.models.ECAPA_TDNN.ECAPA_TDNN
|
35 |
+
input_size: !ref <n_mels>
|
36 |
+
channels: [1024, 1024, 1024, 1024, 3072]
|
37 |
+
kernel_sizes: [5, 3, 3, 3, 1]
|
38 |
+
dilations: [1, 2, 3, 4, 1]
|
39 |
+
groups: [1, 1, 1, 1, 1]
|
40 |
+
attention_channels: 128
|
41 |
+
lin_neurons: 256
|
42 |
+
projector: !new:crytorch.models.components.pann.SimSiamProjector
|
43 |
+
input_size: 256
|
44 |
+
hidden_size: 256
|
45 |
+
output_size: 256
|
46 |
+
norm_type: bn
|
47 |
+
|
48 |
+
modules:
|
49 |
+
compute_features: !ref <compute_features>
|
50 |
+
mean_var_norm: !ref <mean_var_norm>
|
51 |
+
embedding_model: !ref <embedding_model>
|
52 |
+
|
53 |
+
label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
|
54 |
+
|
55 |
+
|
56 |
+
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
|
57 |
+
loadables:
|
58 |
+
embedding: !ref <embedding_model.embedding>
|
59 |
+
projector: !ref <embedding_model.projector>
|
60 |
+
paths:
|
61 |
+
embedding: !ref <pretrained_path>/embedding_model.ckpt
|
62 |
+
projector: !ref <pretrained_path>/projector.ckpt
|
63 |
+
|
64 |
+
|
projector.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c890e8ca36066d9a30090ac1835ac6fafd6f03435dc4bdcfcafd44c64c02cbcf
|
3 |
+
size 538555
|
train_hyp.yaml
ADDED
@@ -0,0 +1,252 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Generated 2023-02-12 from:
|
2 |
+
# /home/agorin/cssl_sound/hparams/ecapa_vgg.yaml
|
3 |
+
# yamllint disable
|
4 |
+
# File : supclr_train.yaml
|
5 |
+
# Author : Zhepei Wang <zhepeiw2@illinois.edu>
|
6 |
+
# Date : 27.01.2022
|
7 |
+
# Last Modified Date: 31.03.2022
|
8 |
+
# Last Modified By : Zhepei Wang <zhepeiw2@illinois.edu>
|
9 |
+
|
10 |
+
|
11 |
+
seed: 2022
|
12 |
+
__set_seed: !apply:torch.manual_seed [2022]
|
13 |
+
np_rng: !new:numpy.random.RandomState [2022]
|
14 |
+
|
15 |
+
resume_interrupt: false
|
16 |
+
resume_task_idx: 0
|
17 |
+
balanced_cry: false
|
18 |
+
|
19 |
+
time_stamp: 2023-02-12+21-11-02
|
20 |
+
experiment_name: ecapa_vgg
|
21 |
+
# output_folder: !ref results/<experiment_name>/<seed>
|
22 |
+
output_base: results #/home/agorin/datasets/results_cssl
|
23 |
+
output_folder: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg
|
24 |
+
train_log: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg/train_log.txt
|
25 |
+
save_folder: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg/save
|
26 |
+
|
27 |
+
# Number of classes
|
28 |
+
n_classes: 308
|
29 |
+
num_tasks: 1
|
30 |
+
# cont learning setup
|
31 |
+
task_classes: &id001 !apply:utils.prepare_task_classes
|
32 |
+
num_classes: 308
|
33 |
+
num_tasks: 1
|
34 |
+
seed: 2022
|
35 |
+
replay_num_keep: 0
|
36 |
+
|
37 |
+
use_mixup: false
|
38 |
+
mixup_alpha: 0.4
|
39 |
+
train_duration: 4.0
|
40 |
+
|
41 |
+
# Training parameters
|
42 |
+
number_of_epochs: 50
|
43 |
+
batch_size: 128
|
44 |
+
# lr: 0.001
|
45 |
+
# base_lr: 0.00000001
|
46 |
+
# max_lr: !ref <lr>
|
47 |
+
# step_size: 65000
|
48 |
+
warmup_epochs: 5
|
49 |
+
warmup_lr: 0.0
|
50 |
+
base_lr: 0.015
|
51 |
+
final_lr: 5e-09
|
52 |
+
|
53 |
+
# dataset
|
54 |
+
sample_rate: 16000
|
55 |
+
|
56 |
+
data_folder: /home/agorin/datasets/VGG-Sound
|
57 |
+
label_encoder_path: ./dataset/label_encoder_vggsound_ordered.txt
|
58 |
+
prepare_split_csv_fn: !name:dataset.prepare_vggsound2.prepare_split
|
59 |
+
root_dir: /home/agorin/datasets/VGG-Sound
|
60 |
+
output_dir: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg/save
|
61 |
+
task_classes: *id001
|
62 |
+
train_split: 0.8
|
63 |
+
seed: 2022
|
64 |
+
|
65 |
+
train_dataloader_opts:
|
66 |
+
batch_size: 128
|
67 |
+
num_workers: 8
|
68 |
+
shuffle: true
|
69 |
+
drop_last: true
|
70 |
+
|
71 |
+
|
72 |
+
valid_dataloader_opts:
|
73 |
+
batch_size: 32
|
74 |
+
num_workers: 8
|
75 |
+
|
76 |
+
|
77 |
+
|
78 |
+
# Experiment params
|
79 |
+
auto_mix_prec: false # True # True # Set it to True for mixed precision
|
80 |
+
|
81 |
+
|
82 |
+
# Feature parameters
|
83 |
+
n_mels: 80
|
84 |
+
left_frames: 0
|
85 |
+
right_frames: 0
|
86 |
+
deltas: false
|
87 |
+
amp_to_db: false
|
88 |
+
normalize: true
|
89 |
+
win_length: 25
|
90 |
+
hop_length: 10
|
91 |
+
n_fft: 400
|
92 |
+
f_min: 0
|
93 |
+
use_time_roll: false
|
94 |
+
use_freq_shift: false
|
95 |
+
emb_dim: 256
|
96 |
+
emb_norm_type: bn
|
97 |
+
proj_norm_type: bn
|
98 |
+
|
99 |
+
# augmentation
|
100 |
+
# time_domain_aug: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
|
101 |
+
# sample_rate: !ref <sample_rate>
|
102 |
+
# # drop_chunk_count_high: 2
|
103 |
+
# # drop_chunk_noise_factor: 0.05
|
104 |
+
# speeds: [90, 95, 100, 105, 110]
|
105 |
+
# drop_freq_count_high: 4
|
106 |
+
# drop_chunk_count_high: 3
|
107 |
+
# # drop_chunk_length_low: 1000
|
108 |
+
# # drop_chunk_length_high: 5000
|
109 |
+
spec_domain_aug: !new:augmentation.TFAugmentation
|
110 |
+
time_warp: true
|
111 |
+
time_warp_window: 8
|
112 |
+
freq_mask: true
|
113 |
+
freq_mask_width: !tuple (0, 10)
|
114 |
+
n_freq_mask: 2
|
115 |
+
time_mask: true
|
116 |
+
time_mask_width: !tuple (0, 10)
|
117 |
+
n_time_mask: 2
|
118 |
+
replace_with_zero: true
|
119 |
+
time_roll: false
|
120 |
+
time_roll_limit: !tuple (0, 200)
|
121 |
+
freq_shift: false
|
122 |
+
freq_shift_limit: !tuple (-10, 10)
|
123 |
+
|
124 |
+
|
125 |
+
# Functions
|
126 |
+
compute_features: &id002 !new:speechbrain.lobes.features.Fbank
|
127 |
+
n_mels: 80
|
128 |
+
left_frames: 0
|
129 |
+
right_frames: 0
|
130 |
+
deltas: false
|
131 |
+
sample_rate: 16000
|
132 |
+
n_fft: 400
|
133 |
+
win_length: 25
|
134 |
+
hop_length: 10
|
135 |
+
f_min: 0
|
136 |
+
|
137 |
+
mean_var_norm: &id007 !new:speechbrain.processing.features.InputNormalization
|
138 |
+
|
139 |
+
norm_type: sentence
|
140 |
+
std_norm: false
|
141 |
+
|
142 |
+
embedding_model: &id003 !new:speechbrain.lobes.models.ECAPA_TDNN.ECAPA_TDNN
|
143 |
+
input_size: 80
|
144 |
+
channels: [1024, 1024, 1024, 1024, 3072]
|
145 |
+
kernel_sizes: [5, 3, 3, 3, 1]
|
146 |
+
dilations: [1, 2, 3, 4, 1]
|
147 |
+
groups: [1, 1, 1, 1, 1]
|
148 |
+
attention_channels: 128
|
149 |
+
lin_neurons: 256
|
150 |
+
|
151 |
+
# embedding_model: !new:models.pann.Cnn14
|
152 |
+
# mel_bins: !ref <n_mels>
|
153 |
+
# emb_dim: !ref <emb_dim>
|
154 |
+
# norm_type: !ref <emb_norm_type>
|
155 |
+
|
156 |
+
projector: &id005 !new:models.modules.SimSiamProjector
|
157 |
+
input_size: 256
|
158 |
+
hidden_size: 256
|
159 |
+
output_size: 256
|
160 |
+
norm_type: bn
|
161 |
+
|
162 |
+
predictor: &id006 !new:models.modules.SimSiamPredictor
|
163 |
+
input_size: 256
|
164 |
+
hidden_size: 128
|
165 |
+
norm_type: bn
|
166 |
+
|
167 |
+
classifier: &id004 !new:models.modules.Classifier
|
168 |
+
input_size: 256
|
169 |
+
output_size: 308
|
170 |
+
|
171 |
+
modules:
|
172 |
+
compute_features: *id002
|
173 |
+
embedding_model: *id003
|
174 |
+
classifier: *id004
|
175 |
+
projector: *id005
|
176 |
+
predictor: *id006
|
177 |
+
mean_var_norm: *id007
|
178 |
+
ssl_weight: 1.
|
179 |
+
compute_simclr_cost: !new:losses.SimCLRLoss
|
180 |
+
tau: 0.5
|
181 |
+
|
182 |
+
sup_weight: 0.
|
183 |
+
compute_sup_cost: !new:losses.LogSoftmaxWithProbWrapper
|
184 |
+
loss_fn: !new:torch.nn.Identity
|
185 |
+
|
186 |
+
dist_weight: 0
|
187 |
+
compute_dist_cost: !new:losses.SimCLRLoss
|
188 |
+
tau: 0.5
|
189 |
+
|
190 |
+
|
191 |
+
acc_metric: !name:speechbrain.utils.Accuracy.AccuracyStats
|
192 |
+
|
193 |
+
# opt_class: !name:torch.optim.Adam
|
194 |
+
# lr: !ref <base_lr>
|
195 |
+
# weight_decay: 0.0005
|
196 |
+
#
|
197 |
+
# lr_scheduler_fn: !name:speechbrain.nnet.schedulers.CyclicLRScheduler
|
198 |
+
# base_lr: !ref <final_lr>
|
199 |
+
# max_lr: !ref <base_lr>
|
200 |
+
# step_size: 888
|
201 |
+
|
202 |
+
opt_class: !name:torch.optim.SGD
|
203 |
+
lr: 0.015
|
204 |
+
weight_decay: 0.0005
|
205 |
+
momentum: 0.9
|
206 |
+
|
207 |
+
lr_scheduler_fn: !name:schedulers.SimSiamCosineScheduler
|
208 |
+
warmup_epochs: 5
|
209 |
+
warmup_lr: 0.0
|
210 |
+
num_epochs: 50
|
211 |
+
base_lr: 0.015
|
212 |
+
final_lr: 5e-09
|
213 |
+
steps_per_epoch: 200
|
214 |
+
constant_predictor_lr: true
|
215 |
+
|
216 |
+
epoch_counter_fn: !name:speechbrain.utils.epoch_loop.EpochCounter
|
217 |
+
limit: 50
|
218 |
+
|
219 |
+
datapoint_counter: &id008 !new:utils.DatapointCounter
|
220 |
+
|
221 |
+
#prev_checkpointer: null
|
222 |
+
#prev_checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
|
223 |
+
# checkpoints_dir: /home/agorin/vgg_offline/2022-04-13+23-33-21_seed_2022+ssl_offline/save/task0
|
224 |
+
# # Logging + checkpoints
|
225 |
+
checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
|
226 |
+
checkpoints_dir: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg/save
|
227 |
+
|
228 |
+
recoverables:
|
229 |
+
embedding_model: *id003
|
230 |
+
classifier: *id004
|
231 |
+
projector: *id005
|
232 |
+
predictor: *id006
|
233 |
+
normalizer: *id007
|
234 |
+
datapoint_counter: *id008
|
235 |
+
ssl_checkpoints_dir: # /home/agorin/vgg_offline/2022-04-13+23-33-21_seed_2022+ssl_offline/save
|
236 |
+
|
237 |
+
train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
|
238 |
+
save_file: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg/train_log.txt
|
239 |
+
|
240 |
+
# wandb
|
241 |
+
use_wandb: false
|
242 |
+
train_log_frequency: 20
|
243 |
+
wandb_logger_fn: !name:utils.MyWandBLogger
|
244 |
+
initializer: !name:wandb.init
|
245 |
+
entity: CAL
|
246 |
+
project: cssl_sound
|
247 |
+
name: 2023-02-12+21-11-02+seed_2022+ecapa_vgg
|
248 |
+
dir: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg
|
249 |
+
reinit: true
|
250 |
+
yaml_config: hparams/vgg/supclr_train.yaml
|
251 |
+
resume: false
|
252 |
+
|
train_log.txt
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
epoch: 1, lr: 3.00e-03, datapoints_seen: 167808 - train loss: 4.13
|
2 |
+
epoch: 2, lr: 5.99e-03, datapoints_seen: 335616 - train loss: 3.95
|
3 |
+
epoch: 3, lr: 8.99e-03, datapoints_seen: 503424 - train loss: 3.90
|
4 |
+
epoch: 4, lr: 1.20e-02, datapoints_seen: 671232 - train loss: 3.87
|
5 |
+
epoch: 5, lr: 1.50e-02, datapoints_seen: 839040 - train loss: 3.85
|
6 |
+
epoch: 6, lr: 1.50e-02, datapoints_seen: 1006848 - train loss: 3.84
|
7 |
+
epoch: 7, lr: 1.49e-02, datapoints_seen: 1174656 - train loss: 3.82
|
8 |
+
epoch: 8, lr: 1.48e-02, datapoints_seen: 1342464 - train loss: 3.82
|
9 |
+
epoch: 9, lr: 1.47e-02, datapoints_seen: 1510272 - train loss: 3.81
|
10 |
+
epoch: 10, lr: 1.45e-02, datapoints_seen: 1678080 - train loss: 3.80
|
11 |
+
epoch: 11, lr: 1.44e-02, datapoints_seen: 1845888 - train loss: 3.80
|
12 |
+
epoch: 12, lr: 1.41e-02, datapoints_seen: 2013696 - train loss: 3.80
|
13 |
+
epoch: 13, lr: 1.39e-02, datapoints_seen: 2181504 - train loss: 3.79
|
14 |
+
epoch: 14, lr: 1.36e-02, datapoints_seen: 2349312 - train loss: 3.79
|
15 |
+
epoch: 15, lr: 1.32e-02, datapoints_seen: 2517120 - train loss: 3.79
|
16 |
+
epoch: 16, lr: 1.29e-02, datapoints_seen: 2684928 - train loss: 3.79
|
17 |
+
epoch: 17, lr: 1.25e-02, datapoints_seen: 2852736 - train loss: 3.78
|
18 |
+
epoch: 18, lr: 1.21e-02, datapoints_seen: 3020544 - train loss: 3.78
|
19 |
+
epoch: 19, lr: 1.17e-02, datapoints_seen: 3188352 - train loss: 3.78
|
20 |
+
epoch: 20, lr: 1.13e-02, datapoints_seen: 3356160 - train loss: 3.78
|
21 |
+
epoch: 21, lr: 1.08e-02, datapoints_seen: 3523968 - train loss: 3.78
|
22 |
+
epoch: 22, lr: 1.03e-02, datapoints_seen: 3691776 - train loss: 3.77
|
23 |
+
epoch: 23, lr: 9.83e-03, datapoints_seen: 3859584 - train loss: 3.77
|
24 |
+
epoch: 24, lr: 9.32e-03, datapoints_seen: 4027392 - train loss: 3.77
|
25 |
+
epoch: 25, lr: 8.81e-03, datapoints_seen: 4195200 - train loss: 3.77
|
26 |
+
epoch: 26, lr: 8.29e-03, datapoints_seen: 4363008 - train loss: 3.77
|
27 |
+
epoch: 27, lr: 7.77e-03, datapoints_seen: 4530816 - train loss: 3.76
|
28 |
+
epoch: 28, lr: 7.25e-03, datapoints_seen: 4698624 - train loss: 3.76
|
29 |
+
epoch: 29, lr: 6.73e-03, datapoints_seen: 4866432 - train loss: 3.76
|
30 |
+
epoch: 30, lr: 6.21e-03, datapoints_seen: 5034240 - train loss: 3.76
|
31 |
+
epoch: 31, lr: 5.70e-03, datapoints_seen: 5202048 - train loss: 3.76
|
32 |
+
epoch: 32, lr: 5.19e-03, datapoints_seen: 5369856 - train loss: 3.75
|
33 |
+
epoch: 33, lr: 4.70e-03, datapoints_seen: 5537664 - train loss: 3.75
|
34 |
+
epoch: 34, lr: 4.22e-03, datapoints_seen: 5705472 - train loss: 3.75
|
35 |
+
epoch: 35, lr: 3.76e-03, datapoints_seen: 5873280 - train loss: 3.75
|
36 |
+
epoch: 36, lr: 3.32e-03, datapoints_seen: 6041088 - train loss: 3.75
|
37 |
+
epoch: 37, lr: 2.89e-03, datapoints_seen: 6208896 - train loss: 3.74
|
38 |
+
epoch: 38, lr: 2.49e-03, datapoints_seen: 6376704 - train loss: 3.74
|
39 |
+
epoch: 39, lr: 2.12e-03, datapoints_seen: 6544512 - train loss: 3.74
|
40 |
+
epoch: 40, lr: 1.77e-03, datapoints_seen: 6712320 - train loss: 3.74
|
41 |
+
epoch: 41, lr: 1.44e-03, datapoints_seen: 6880128 - train loss: 3.73
|
42 |
+
epoch: 42, lr: 1.15e-03, datapoints_seen: 7047936 - train loss: 3.73
|
43 |
+
epoch: 43, lr: 8.86e-04, datapoints_seen: 7215744 - train loss: 3.73
|
44 |
+
epoch: 44, lr: 6.56e-04, datapoints_seen: 7383552 - train loss: 3.73
|
45 |
+
epoch: 45, lr: 4.59e-04, datapoints_seen: 7551360 - train loss: 3.73
|
46 |
+
epoch: 46, lr: 2.96e-04, datapoints_seen: 7719168 - train loss: 3.73
|
47 |
+
epoch: 47, lr: 1.68e-04, datapoints_seen: 7886976 - train loss: 3.73
|
48 |
+
epoch: 48, lr: 7.57e-05, datapoints_seen: 8054784 - train loss: 3.73
|
49 |
+
epoch: 49, lr: 1.97e-05, datapoints_seen: 8222592 - train loss: 3.72
|
50 |
+
epoch: 50, lr: 3.26e-08, datapoints_seen: 8390400 - train loss: 3.72
|