Upload train.log
Browse files
train.log
ADDED
@@ -0,0 +1,275 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2022-08-30 17:59:36,720 - INFO - root - Hello! This is Joey-NMT (version 2.0.0).
|
2 |
+
2022-08-30 17:59:36,721 - INFO - joeynmt.helpers - cfg.name : uzbek_kazakh_deen_sp
|
3 |
+
2022-08-30 17:59:36,722 - INFO - joeynmt.helpers - cfg.joeynmt_version : 2.0.0
|
4 |
+
2022-08-30 17:59:36,722 - INFO - joeynmt.helpers - cfg.data.train : /content/drive/MyDrive/uzbek_kazakh/train
|
5 |
+
2022-08-30 17:59:36,722 - INFO - joeynmt.helpers - cfg.data.dev : /content/drive/MyDrive/uzbek_kazakh/validation
|
6 |
+
2022-08-30 17:59:36,722 - INFO - joeynmt.helpers - cfg.data.test : /content/drive/MyDrive/uzbek_kazakh/test
|
7 |
+
2022-08-30 17:59:36,722 - INFO - joeynmt.helpers - cfg.data.dataset_type : huggingface
|
8 |
+
2022-08-30 17:59:36,722 - INFO - joeynmt.helpers - cfg.data.sample_dev_subset : 200
|
9 |
+
2022-08-30 17:59:36,722 - INFO - joeynmt.helpers - cfg.data.src.lang : uz
|
10 |
+
2022-08-30 17:59:36,722 - INFO - joeynmt.helpers - cfg.data.src.max_length : 100
|
11 |
+
2022-08-30 17:59:36,723 - INFO - joeynmt.helpers - cfg.data.src.lowercase : False
|
12 |
+
2022-08-30 17:59:36,723 - INFO - joeynmt.helpers - cfg.data.src.normalize : False
|
13 |
+
2022-08-30 17:59:36,723 - INFO - joeynmt.helpers - cfg.data.src.level : bpe
|
14 |
+
2022-08-30 17:59:36,723 - INFO - joeynmt.helpers - cfg.data.src.voc_limit : 10000
|
15 |
+
2022-08-30 17:59:36,723 - INFO - joeynmt.helpers - cfg.data.src.voc_min_freq : 1
|
16 |
+
2022-08-30 17:59:36,723 - INFO - joeynmt.helpers - cfg.data.src.voc_file : /content/drive/MyDrive/uzbek_kazakh/vocab.txt
|
17 |
+
2022-08-30 17:59:36,723 - INFO - joeynmt.helpers - cfg.data.src.tokenizer_type : sentencepiece
|
18 |
+
2022-08-30 17:59:36,723 - INFO - joeynmt.helpers - cfg.data.src.tokenizer_cfg.model_file : /content/drive/MyDrive/uzbek_kazakh/sp.model
|
19 |
+
2022-08-30 17:59:36,723 - INFO - joeynmt.helpers - cfg.data.trg.lang : kz
|
20 |
+
2022-08-30 17:59:36,724 - INFO - joeynmt.helpers - cfg.data.trg.max_length : 100
|
21 |
+
2022-08-30 17:59:36,724 - INFO - joeynmt.helpers - cfg.data.trg.lowercase : False
|
22 |
+
2022-08-30 17:59:36,724 - INFO - joeynmt.helpers - cfg.data.trg.normalize : False
|
23 |
+
2022-08-30 17:59:36,724 - INFO - joeynmt.helpers - cfg.data.trg.level : bpe
|
24 |
+
2022-08-30 17:59:36,724 - INFO - joeynmt.helpers - cfg.data.trg.voc_limit : 10000
|
25 |
+
2022-08-30 17:59:36,724 - INFO - joeynmt.helpers - cfg.data.trg.voc_min_freq : 1
|
26 |
+
2022-08-30 17:59:36,724 - INFO - joeynmt.helpers - cfg.data.trg.voc_file : /content/drive/MyDrive/uzbek_kazakh/vocab.txt
|
27 |
+
2022-08-30 17:59:36,724 - INFO - joeynmt.helpers - cfg.data.trg.tokenizer_type : sentencepiece
|
28 |
+
2022-08-30 17:59:36,724 - INFO - joeynmt.helpers - cfg.data.trg.tokenizer_cfg.model_file : /content/drive/MyDrive/uzbek_kazakh/sp.model
|
29 |
+
2022-08-30 17:59:36,724 - INFO - joeynmt.helpers - cfg.testing.n_best : 1
|
30 |
+
2022-08-30 17:59:36,725 - INFO - joeynmt.helpers - cfg.testing.beam_size : 5
|
31 |
+
2022-08-30 17:59:36,725 - INFO - joeynmt.helpers - cfg.testing.beam_alpha : 1.0
|
32 |
+
2022-08-30 17:59:36,725 - INFO - joeynmt.helpers - cfg.testing.batch_size : 256
|
33 |
+
2022-08-30 17:59:36,725 - INFO - joeynmt.helpers - cfg.testing.batch_type : token
|
34 |
+
2022-08-30 17:59:36,725 - INFO - joeynmt.helpers - cfg.testing.max_output_length : 100
|
35 |
+
2022-08-30 17:59:36,725 - INFO - joeynmt.helpers - cfg.testing.eval_metrics : ['bleu']
|
36 |
+
2022-08-30 17:59:36,725 - INFO - joeynmt.helpers - cfg.testing.sacrebleu_cfg.tokenize : 13a
|
37 |
+
2022-08-30 17:59:36,725 - INFO - joeynmt.helpers - cfg.training.load_model : /content/drive/MyDrive/models/uzbek_kazakh/latest.ckpt
|
38 |
+
2022-08-30 17:59:36,725 - INFO - joeynmt.helpers - cfg.training.reset_best_ckpt : False
|
39 |
+
2022-08-30 17:59:36,725 - INFO - joeynmt.helpers - cfg.training.reset_scheduler : False
|
40 |
+
2022-08-30 17:59:36,726 - INFO - joeynmt.helpers - cfg.training.reset_optimizer : False
|
41 |
+
2022-08-30 17:59:36,726 - INFO - joeynmt.helpers - cfg.training.reset_iter_state : False
|
42 |
+
2022-08-30 17:59:36,726 - INFO - joeynmt.helpers - cfg.training.random_seed : 42
|
43 |
+
2022-08-30 17:59:36,726 - INFO - joeynmt.helpers - cfg.training.optimizer : adam
|
44 |
+
2022-08-30 17:59:36,726 - INFO - joeynmt.helpers - cfg.training.normalization : tokens
|
45 |
+
2022-08-30 17:59:36,726 - INFO - joeynmt.helpers - cfg.training.adam_betas : [0.9, 0.999]
|
46 |
+
2022-08-30 17:59:36,726 - INFO - joeynmt.helpers - cfg.training.scheduling : warmupinversesquareroot
|
47 |
+
2022-08-30 17:59:36,726 - INFO - joeynmt.helpers - cfg.training.learning_rate_warmup : 2000
|
48 |
+
2022-08-30 17:59:36,726 - INFO - joeynmt.helpers - cfg.training.learning_rate : 0.0002
|
49 |
+
2022-08-30 17:59:36,727 - INFO - joeynmt.helpers - cfg.training.learning_rate_min : 1e-08
|
50 |
+
2022-08-30 17:59:36,727 - INFO - joeynmt.helpers - cfg.training.weight_decay : 0.0
|
51 |
+
2022-08-30 17:59:36,727 - INFO - joeynmt.helpers - cfg.training.label_smoothing : 0.1
|
52 |
+
2022-08-30 17:59:36,727 - INFO - joeynmt.helpers - cfg.training.loss : crossentropy
|
53 |
+
2022-08-30 17:59:36,727 - INFO - joeynmt.helpers - cfg.training.batch_size : 512
|
54 |
+
2022-08-30 17:59:36,727 - INFO - joeynmt.helpers - cfg.training.batch_type : token
|
55 |
+
2022-08-30 17:59:36,727 - INFO - joeynmt.helpers - cfg.training.batch_multiplier : 4
|
56 |
+
2022-08-30 17:59:36,727 - INFO - joeynmt.helpers - cfg.training.early_stopping_metric : bleu
|
57 |
+
2022-08-30 17:59:36,727 - INFO - joeynmt.helpers - cfg.training.epochs : 10
|
58 |
+
2022-08-30 17:59:36,727 - INFO - joeynmt.helpers - cfg.training.updates : 20000
|
59 |
+
2022-08-30 17:59:36,728 - INFO - joeynmt.helpers - cfg.training.validation_freq : 1000
|
60 |
+
2022-08-30 17:59:36,728 - INFO - joeynmt.helpers - cfg.training.logging_freq : 100
|
61 |
+
2022-08-30 17:59:36,728 - INFO - joeynmt.helpers - cfg.training.model_dir : /content/drive/MyDrive/models/uzbek_kazakh_resume
|
62 |
+
2022-08-30 17:59:36,728 - INFO - joeynmt.helpers - cfg.training.overwrite : True
|
63 |
+
2022-08-30 17:59:36,728 - INFO - joeynmt.helpers - cfg.training.shuffle : True
|
64 |
+
2022-08-30 17:59:36,728 - INFO - joeynmt.helpers - cfg.training.use_cuda : True
|
65 |
+
2022-08-30 17:59:36,728 - INFO - joeynmt.helpers - cfg.training.print_valid_sents : [0, 1, 2, 3]
|
66 |
+
2022-08-30 17:59:36,728 - INFO - joeynmt.helpers - cfg.training.keep_best_ckpts : 3
|
67 |
+
2022-08-30 17:59:36,728 - INFO - joeynmt.helpers - cfg.model.initializer : xavier
|
68 |
+
2022-08-30 17:59:36,729 - INFO - joeynmt.helpers - cfg.model.bias_initializer : zeros
|
69 |
+
2022-08-30 17:59:36,729 - INFO - joeynmt.helpers - cfg.model.init_gain : 1.0
|
70 |
+
2022-08-30 17:59:36,729 - INFO - joeynmt.helpers - cfg.model.embed_initializer : xavier
|
71 |
+
2022-08-30 17:59:36,729 - INFO - joeynmt.helpers - cfg.model.embed_init_gain : 1.0
|
72 |
+
2022-08-30 17:59:36,729 - INFO - joeynmt.helpers - cfg.model.tied_embeddings : True
|
73 |
+
2022-08-30 17:59:36,729 - INFO - joeynmt.helpers - cfg.model.tied_softmax : True
|
74 |
+
2022-08-30 17:59:36,729 - INFO - joeynmt.helpers - cfg.model.encoder.type : transformer
|
75 |
+
2022-08-30 17:59:36,729 - INFO - joeynmt.helpers - cfg.model.encoder.num_layers : 6
|
76 |
+
2022-08-30 17:59:36,730 - INFO - joeynmt.helpers - cfg.model.encoder.num_heads : 4
|
77 |
+
2022-08-30 17:59:36,730 - INFO - joeynmt.helpers - cfg.model.encoder.embeddings.embedding_dim : 256
|
78 |
+
2022-08-30 17:59:36,730 - INFO - joeynmt.helpers - cfg.model.encoder.embeddings.scale : True
|
79 |
+
2022-08-30 17:59:36,730 - INFO - joeynmt.helpers - cfg.model.encoder.embeddings.dropout : 0.0
|
80 |
+
2022-08-30 17:59:36,730 - INFO - joeynmt.helpers - cfg.model.encoder.hidden_size : 256
|
81 |
+
2022-08-30 17:59:36,730 - INFO - joeynmt.helpers - cfg.model.encoder.ff_size : 1024
|
82 |
+
2022-08-30 17:59:36,730 - INFO - joeynmt.helpers - cfg.model.encoder.dropout : 0.1
|
83 |
+
2022-08-30 17:59:36,730 - INFO - joeynmt.helpers - cfg.model.encoder.layer_norm : pre
|
84 |
+
2022-08-30 17:59:36,730 - INFO - joeynmt.helpers - cfg.model.decoder.type : transformer
|
85 |
+
2022-08-30 17:59:36,731 - INFO - joeynmt.helpers - cfg.model.decoder.num_layers : 6
|
86 |
+
2022-08-30 17:59:36,731 - INFO - joeynmt.helpers - cfg.model.decoder.num_heads : 8
|
87 |
+
2022-08-30 17:59:36,731 - INFO - joeynmt.helpers - cfg.model.decoder.embeddings.embedding_dim : 256
|
88 |
+
2022-08-30 17:59:36,731 - INFO - joeynmt.helpers - cfg.model.decoder.embeddings.scale : True
|
89 |
+
2022-08-30 17:59:36,731 - INFO - joeynmt.helpers - cfg.model.decoder.embeddings.dropout : 0.0
|
90 |
+
2022-08-30 17:59:36,731 - INFO - joeynmt.helpers - cfg.model.decoder.hidden_size : 256
|
91 |
+
2022-08-30 17:59:36,731 - INFO - joeynmt.helpers - cfg.model.decoder.ff_size : 1024
|
92 |
+
2022-08-30 17:59:36,731 - INFO - joeynmt.helpers - cfg.model.decoder.dropout : 0.1
|
93 |
+
2022-08-30 17:59:36,731 - INFO - joeynmt.helpers - cfg.model.decoder.layer_norm : pre
|
94 |
+
2022-08-30 17:59:36,747 - INFO - joeynmt.data - Building tokenizer...
|
95 |
+
2022-08-30 17:59:36,794 - INFO - joeynmt.tokenizers - uz tokenizer: SentencePieceTokenizer(level=bpe, lowercase=False, normalize=False, filter_by_length=(-1, 100), pretokenizer=none, tokenizer=SentencePieceProcessor, nbest_size=5, alpha=0.0)
|
96 |
+
2022-08-30 17:59:36,794 - INFO - joeynmt.tokenizers - kz tokenizer: SentencePieceTokenizer(level=bpe, lowercase=False, normalize=False, filter_by_length=(-1, 100), pretokenizer=none, tokenizer=SentencePieceProcessor, nbest_size=5, alpha=0.0)
|
97 |
+
2022-08-30 17:59:36,794 - INFO - joeynmt.data - Loading train set...
|
98 |
+
2022-08-30 17:59:36,878 - INFO - numexpr.utils - NumExpr defaulting to 2 threads.
|
99 |
+
2022-08-30 17:59:37,487 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /content/drive/MyDrive/uzbek_kazakh/train/cache-ffede7c3614c284d.arrow
|
100 |
+
2022-08-30 17:59:37,502 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /content/drive/MyDrive/uzbek_kazakh/train/cache-7f7cccfb7f64fcc4.arrow
|
101 |
+
2022-08-30 17:59:37,508 - INFO - joeynmt.data - Building vocabulary...
|
102 |
+
2022-08-30 17:59:39,022 - INFO - joeynmt.data - Loading dev set...
|
103 |
+
2022-08-30 17:59:39,141 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /content/drive/MyDrive/uzbek_kazakh/validation/cache-9a4ec3d7229a6caf.arrow
|
104 |
+
2022-08-30 17:59:39,251 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /content/drive/MyDrive/uzbek_kazakh/validation/cache-fddace07f2634e75.arrow
|
105 |
+
2022-08-30 17:59:39,253 - INFO - joeynmt.data - Loading test set...
|
106 |
+
2022-08-30 17:59:39,381 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /content/drive/MyDrive/uzbek_kazakh/test/cache-278af25010d8c5bc.arrow
|
107 |
+
2022-08-30 17:59:39,490 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /content/drive/MyDrive/uzbek_kazakh/test/cache-1c46da5c3e39cbdd.arrow
|
108 |
+
2022-08-30 17:59:39,493 - INFO - joeynmt.data - Data loaded.
|
109 |
+
2022-08-30 17:59:39,493 - INFO - joeynmt.helpers - Train dataset: HuggingfaceDataset(len=11023, src_lang=uz, trg_lang=kz, has_trg=True, random_subset=-1, split=train, path=/content/drive/MyDrive/uzbek_kazakh/train)
|
110 |
+
2022-08-30 17:59:39,494 - INFO - joeynmt.helpers - Valid dataset: HuggingfaceDataset(len=1000, src_lang=uz, trg_lang=kz, has_trg=True, random_subset=200, split=validation, path=/content/drive/MyDrive/uzbek_kazakh/validation)
|
111 |
+
2022-08-30 17:59:39,494 - INFO - joeynmt.helpers - Test dataset: HuggingfaceDataset(len=1000, src_lang=uz, trg_lang=kz, has_trg=True, random_subset=-1, split=test, path=/content/drive/MyDrive/uzbek_kazakh/test)
|
112 |
+
2022-08-30 17:59:39,495 - INFO - joeynmt.helpers - First training example:
|
113 |
+
[SRC] ▁— ▁Ha , ▁bun ga ▁shubha ▁yo ‘ q , ▁— ▁dedi ▁kapitan ▁Gul ▁u ning ▁so ‘ zlarini ▁ma ’ qul lab .
|
114 |
+
[TRG] ▁— ▁Ие , ▁оны сында ▁күм ə н ▁жоқ ,— ▁деп ▁капитан ▁Гуль ▁оның ▁сөзін ▁мақұлдады .
|
115 |
+
2022-08-30 17:59:39,495 - INFO - joeynmt.helpers - First 10 Src tokens: (0) <unk> (1) <pad> (2) <s> (3) </s> (4) . (5) , (6) ' (7) ‘ (8) ga (9) ▁
|
116 |
+
2022-08-30 17:59:39,495 - INFO - joeynmt.helpers - First 10 Trg tokens: (0) <unk> (1) <pad> (2) <s> (3) </s> (4) . (5) , (6) ' (7) ‘ (8) ga (9) ▁
|
117 |
+
2022-08-30 17:59:39,495 - INFO - joeynmt.helpers - Number of unique Src tokens (vocab_size): 10000
|
118 |
+
2022-08-30 17:59:39,496 - INFO - joeynmt.helpers - Number of unique Trg tokens (vocab_size): 10000
|
119 |
+
2022-08-30 17:59:39,520 - WARNING - joeynmt.tokenizers - /content/drive/MyDrive/models/uzbek_kazakh_resume/sp.model already exists. Stop copying.
|
120 |
+
2022-08-30 17:59:39,528 - INFO - joeynmt.model - Building an encoder-decoder model...
|
121 |
+
2022-08-30 17:59:39,775 - INFO - joeynmt.model - Enc-dec model built.
|
122 |
+
2022-08-30 17:59:40,616 - DEBUG - tensorflow - Falling back to TensorFlow client; we recommended you install the Cloud TPU client directly with pip install cloud-tpu-client.
|
123 |
+
2022-08-30 17:59:40,720 - DEBUG - h5py._conv - Creating converter from 7 to 5
|
124 |
+
2022-08-30 17:59:40,720 - DEBUG - h5py._conv - Creating converter from 5 to 7
|
125 |
+
2022-08-30 17:59:40,720 - DEBUG - h5py._conv - Creating converter from 7 to 5
|
126 |
+
2022-08-30 17:59:40,721 - DEBUG - h5py._conv - Creating converter from 5 to 7
|
127 |
+
2022-08-30 17:59:41,868 - INFO - joeynmt.model - Total params: 13620224
|
128 |
+
2022-08-30 17:59:41,869 - DEBUG - joeynmt.model - Trainable parameters: ['decoder.layer_norm.bias', 'decoder.layer_norm.weight', 'decoder.layers.0.dec_layer_norm.bias', 'decoder.layers.0.dec_layer_norm.weight', 'decoder.layers.0.feed_forward.layer_norm.bias', 'decoder.layers.0.feed_forward.layer_norm.weight', 'decoder.layers.0.feed_forward.pwff_layer.0.bias', 'decoder.layers.0.feed_forward.pwff_layer.0.weight', 'decoder.layers.0.feed_forward.pwff_layer.3.bias', 'decoder.layers.0.feed_forward.pwff_layer.3.weight', 'decoder.layers.0.src_trg_att.k_layer.bias', 'decoder.layers.0.src_trg_att.k_layer.weight', 'decoder.layers.0.src_trg_att.output_layer.bias', 'decoder.layers.0.src_trg_att.output_layer.weight', 'decoder.layers.0.src_trg_att.q_layer.bias', 'decoder.layers.0.src_trg_att.q_layer.weight', 'decoder.layers.0.src_trg_att.v_layer.bias', 'decoder.layers.0.src_trg_att.v_layer.weight', 'decoder.layers.0.trg_trg_att.k_layer.bias', 'decoder.layers.0.trg_trg_att.k_layer.weight', 'decoder.layers.0.trg_trg_att.output_layer.bias', 'decoder.layers.0.trg_trg_att.output_layer.weight', 'decoder.layers.0.trg_trg_att.q_layer.bias', 'decoder.layers.0.trg_trg_att.q_layer.weight', 'decoder.layers.0.trg_trg_att.v_layer.bias', 'decoder.layers.0.trg_trg_att.v_layer.weight', 'decoder.layers.0.x_layer_norm.bias', 'decoder.layers.0.x_layer_norm.weight', 'decoder.layers.1.dec_layer_norm.bias', 'decoder.layers.1.dec_layer_norm.weight', 'decoder.layers.1.feed_forward.layer_norm.bias', 'decoder.layers.1.feed_forward.layer_norm.weight', 'decoder.layers.1.feed_forward.pwff_layer.0.bias', 'decoder.layers.1.feed_forward.pwff_layer.0.weight', 'decoder.layers.1.feed_forward.pwff_layer.3.bias', 'decoder.layers.1.feed_forward.pwff_layer.3.weight', 'decoder.layers.1.src_trg_att.k_layer.bias', 'decoder.layers.1.src_trg_att.k_layer.weight', 'decoder.layers.1.src_trg_att.output_layer.bias', 'decoder.layers.1.src_trg_att.output_layer.weight', 'decoder.layers.1.src_trg_att.q_layer.bias', 'decoder.layers.1.src_trg_att.q_layer.weight', 'decoder.layers.1.src_trg_att.v_layer.bias', 'decoder.layers.1.src_trg_att.v_layer.weight', 'decoder.layers.1.trg_trg_att.k_layer.bias', 'decoder.layers.1.trg_trg_att.k_layer.weight', 'decoder.layers.1.trg_trg_att.output_layer.bias', 'decoder.layers.1.trg_trg_att.output_layer.weight', 'decoder.layers.1.trg_trg_att.q_layer.bias', 'decoder.layers.1.trg_trg_att.q_layer.weight', 'decoder.layers.1.trg_trg_att.v_layer.bias', 'decoder.layers.1.trg_trg_att.v_layer.weight', 'decoder.layers.1.x_layer_norm.bias', 'decoder.layers.1.x_layer_norm.weight', 'decoder.layers.2.dec_layer_norm.bias', 'decoder.layers.2.dec_layer_norm.weight', 'decoder.layers.2.feed_forward.layer_norm.bias', 'decoder.layers.2.feed_forward.layer_norm.weight', 'decoder.layers.2.feed_forward.pwff_layer.0.bias', 'decoder.layers.2.feed_forward.pwff_layer.0.weight', 'decoder.layers.2.feed_forward.pwff_layer.3.bias', 'decoder.layers.2.feed_forward.pwff_layer.3.weight', 'decoder.layers.2.src_trg_att.k_layer.bias', 'decoder.layers.2.src_trg_att.k_layer.weight', 'decoder.layers.2.src_trg_att.output_layer.bias', 'decoder.layers.2.src_trg_att.output_layer.weight', 'decoder.layers.2.src_trg_att.q_layer.bias', 'decoder.layers.2.src_trg_att.q_layer.weight', 'decoder.layers.2.src_trg_att.v_layer.bias', 'decoder.layers.2.src_trg_att.v_layer.weight', 'decoder.layers.2.trg_trg_att.k_layer.bias', 'decoder.layers.2.trg_trg_att.k_layer.weight', 'decoder.layers.2.trg_trg_att.output_layer.bias', 'decoder.layers.2.trg_trg_att.output_layer.weight', 'decoder.layers.2.trg_trg_att.q_layer.bias', 'decoder.layers.2.trg_trg_att.q_layer.weight', 'decoder.layers.2.trg_trg_att.v_layer.bias', 'decoder.layers.2.trg_trg_att.v_layer.weight', 'decoder.layers.2.x_layer_norm.bias', 'decoder.layers.2.x_layer_norm.weight', 'decoder.layers.3.dec_layer_norm.bias', 'decoder.layers.3.dec_layer_norm.weight', 'decoder.layers.3.feed_forward.layer_norm.bias', 'decoder.layers.3.feed_forward.layer_norm.weight', 'decoder.layers.3.feed_forward.pwff_layer.0.bias', 'decoder.layers.3.feed_forward.pwff_layer.0.weight', 'decoder.layers.3.feed_forward.pwff_layer.3.bias', 'decoder.layers.3.feed_forward.pwff_layer.3.weight', 'decoder.layers.3.src_trg_att.k_layer.bias', 'decoder.layers.3.src_trg_att.k_layer.weight', 'decoder.layers.3.src_trg_att.output_layer.bias', 'decoder.layers.3.src_trg_att.output_layer.weight', 'decoder.layers.3.src_trg_att.q_layer.bias', 'decoder.layers.3.src_trg_att.q_layer.weight', 'decoder.layers.3.src_trg_att.v_layer.bias', 'decoder.layers.3.src_trg_att.v_layer.weight', 'decoder.layers.3.trg_trg_att.k_layer.bias', 'decoder.layers.3.trg_trg_att.k_layer.weight', 'decoder.layers.3.trg_trg_att.output_layer.bias', 'decoder.layers.3.trg_trg_att.output_layer.weight', 'decoder.layers.3.trg_trg_att.q_layer.bias', 'decoder.layers.3.trg_trg_att.q_layer.weight', 'decoder.layers.3.trg_trg_att.v_layer.bias', 'decoder.layers.3.trg_trg_att.v_layer.weight', 'decoder.layers.3.x_layer_norm.bias', 'decoder.layers.3.x_layer_norm.weight', 'decoder.layers.4.dec_layer_norm.bias', 'decoder.layers.4.dec_layer_norm.weight', 'decoder.layers.4.feed_forward.layer_norm.bias', 'decoder.layers.4.feed_forward.layer_norm.weight', 'decoder.layers.4.feed_forward.pwff_layer.0.bias', 'decoder.layers.4.feed_forward.pwff_layer.0.weight', 'decoder.layers.4.feed_forward.pwff_layer.3.bias', 'decoder.layers.4.feed_forward.pwff_layer.3.weight', 'decoder.layers.4.src_trg_att.k_layer.bias', 'decoder.layers.4.src_trg_att.k_layer.weight', 'decoder.layers.4.src_trg_att.output_layer.bias', 'decoder.layers.4.src_trg_att.output_layer.weight', 'decoder.layers.4.src_trg_att.q_layer.bias', 'decoder.layers.4.src_trg_att.q_layer.weight', 'decoder.layers.4.src_trg_att.v_layer.bias', 'decoder.layers.4.src_trg_att.v_layer.weight', 'decoder.layers.4.trg_trg_att.k_layer.bias', 'decoder.layers.4.trg_trg_att.k_layer.weight', 'decoder.layers.4.trg_trg_att.output_layer.bias', 'decoder.layers.4.trg_trg_att.output_layer.weight', 'decoder.layers.4.trg_trg_att.q_layer.bias', 'decoder.layers.4.trg_trg_att.q_layer.weight', 'decoder.layers.4.trg_trg_att.v_layer.bias', 'decoder.layers.4.trg_trg_att.v_layer.weight', 'decoder.layers.4.x_layer_norm.bias', 'decoder.layers.4.x_layer_norm.weight', 'decoder.layers.5.dec_layer_norm.bias', 'decoder.layers.5.dec_layer_norm.weight', 'decoder.layers.5.feed_forward.layer_norm.bias', 'decoder.layers.5.feed_forward.layer_norm.weight', 'decoder.layers.5.feed_forward.pwff_layer.0.bias', 'decoder.layers.5.feed_forward.pwff_layer.0.weight', 'decoder.layers.5.feed_forward.pwff_layer.3.bias', 'decoder.layers.5.feed_forward.pwff_layer.3.weight', 'decoder.layers.5.src_trg_att.k_layer.bias', 'decoder.layers.5.src_trg_att.k_layer.weight', 'decoder.layers.5.src_trg_att.output_layer.bias', 'decoder.layers.5.src_trg_att.output_layer.weight', 'decoder.layers.5.src_trg_att.q_layer.bias', 'decoder.layers.5.src_trg_att.q_layer.weight', 'decoder.layers.5.src_trg_att.v_layer.bias', 'decoder.layers.5.src_trg_att.v_layer.weight', 'decoder.layers.5.trg_trg_att.k_layer.bias', 'decoder.layers.5.trg_trg_att.k_layer.weight', 'decoder.layers.5.trg_trg_att.output_layer.bias', 'decoder.layers.5.trg_trg_att.output_layer.weight', 'decoder.layers.5.trg_trg_att.q_layer.bias', 'decoder.layers.5.trg_trg_att.q_layer.weight', 'decoder.layers.5.trg_trg_att.v_layer.bias', 'decoder.layers.5.trg_trg_att.v_layer.weight', 'decoder.layers.5.x_layer_norm.bias', 'decoder.layers.5.x_layer_norm.weight', 'encoder.layer_norm.bias', 'encoder.layer_norm.weight', 'encoder.layers.0.feed_forward.layer_norm.bias', 'encoder.layers.0.feed_forward.layer_norm.weight', 'encoder.layers.0.feed_forward.pwff_layer.0.bias', 'encoder.layers.0.feed_forward.pwff_layer.0.weight', 'encoder.layers.0.feed_forward.pwff_layer.3.bias', 'encoder.layers.0.feed_forward.pwff_layer.3.weight', 'encoder.layers.0.layer_norm.bias', 'encoder.layers.0.layer_norm.weight', 'encoder.layers.0.src_src_att.k_layer.bias', 'encoder.layers.0.src_src_att.k_layer.weight', 'encoder.layers.0.src_src_att.output_layer.bias', 'encoder.layers.0.src_src_att.output_layer.weight', 'encoder.layers.0.src_src_att.q_layer.bias', 'encoder.layers.0.src_src_att.q_layer.weight', 'encoder.layers.0.src_src_att.v_layer.bias', 'encoder.layers.0.src_src_att.v_layer.weight', 'encoder.layers.1.feed_forward.layer_norm.bias', 'encoder.layers.1.feed_forward.layer_norm.weight', 'encoder.layers.1.feed_forward.pwff_layer.0.bias', 'encoder.layers.1.feed_forward.pwff_layer.0.weight', 'encoder.layers.1.feed_forward.pwff_layer.3.bias', 'encoder.layers.1.feed_forward.pwff_layer.3.weight', 'encoder.layers.1.layer_norm.bias', 'encoder.layers.1.layer_norm.weight', 'encoder.layers.1.src_src_att.k_layer.bias', 'encoder.layers.1.src_src_att.k_layer.weight', 'encoder.layers.1.src_src_att.output_layer.bias', 'encoder.layers.1.src_src_att.output_layer.weight', 'encoder.layers.1.src_src_att.q_layer.bias', 'encoder.layers.1.src_src_att.q_layer.weight', 'encoder.layers.1.src_src_att.v_layer.bias', 'encoder.layers.1.src_src_att.v_layer.weight', 'encoder.layers.2.feed_forward.layer_norm.bias', 'encoder.layers.2.feed_forward.layer_norm.weight', 'encoder.layers.2.feed_forward.pwff_layer.0.bias', 'encoder.layers.2.feed_forward.pwff_layer.0.weight', 'encoder.layers.2.feed_forward.pwff_layer.3.bias', 'encoder.layers.2.feed_forward.pwff_layer.3.weight', 'encoder.layers.2.layer_norm.bias', 'encoder.layers.2.layer_norm.weight', 'encoder.layers.2.src_src_att.k_layer.bias', 'encoder.layers.2.src_src_att.k_layer.weight', 'encoder.layers.2.src_src_att.output_layer.bias', 'encoder.layers.2.src_src_att.output_layer.weight', 'encoder.layers.2.src_src_att.q_layer.bias', 'encoder.layers.2.src_src_att.q_layer.weight', 'encoder.layers.2.src_src_att.v_layer.bias', 'encoder.layers.2.src_src_att.v_layer.weight', 'encoder.layers.3.feed_forward.layer_norm.bias', 'encoder.layers.3.feed_forward.layer_norm.weight', 'encoder.layers.3.feed_forward.pwff_layer.0.bias', 'encoder.layers.3.feed_forward.pwff_layer.0.weight', 'encoder.layers.3.feed_forward.pwff_layer.3.bias', 'encoder.layers.3.feed_forward.pwff_layer.3.weight', 'encoder.layers.3.layer_norm.bias', 'encoder.layers.3.layer_norm.weight', 'encoder.layers.3.src_src_att.k_layer.bias', 'encoder.layers.3.src_src_att.k_layer.weight', 'encoder.layers.3.src_src_att.output_layer.bias', 'encoder.layers.3.src_src_att.output_layer.weight', 'encoder.layers.3.src_src_att.q_layer.bias', 'encoder.layers.3.src_src_att.q_layer.weight', 'encoder.layers.3.src_src_att.v_layer.bias', 'encoder.layers.3.src_src_att.v_layer.weight', 'encoder.layers.4.feed_forward.layer_norm.bias', 'encoder.layers.4.feed_forward.layer_norm.weight', 'encoder.layers.4.feed_forward.pwff_layer.0.bias', 'encoder.layers.4.feed_forward.pwff_layer.0.weight', 'encoder.layers.4.feed_forward.pwff_layer.3.bias', 'encoder.layers.4.feed_forward.pwff_layer.3.weight', 'encoder.layers.4.layer_norm.bias', 'encoder.layers.4.layer_norm.weight', 'encoder.layers.4.src_src_att.k_layer.bias', 'encoder.layers.4.src_src_att.k_layer.weight', 'encoder.layers.4.src_src_att.output_layer.bias', 'encoder.layers.4.src_src_att.output_layer.weight', 'encoder.layers.4.src_src_att.q_layer.bias', 'encoder.layers.4.src_src_att.q_layer.weight', 'encoder.layers.4.src_src_att.v_layer.bias', 'encoder.layers.4.src_src_att.v_layer.weight', 'encoder.layers.5.feed_forward.layer_norm.bias', 'encoder.layers.5.feed_forward.layer_norm.weight', 'encoder.layers.5.feed_forward.pwff_layer.0.bias', 'encoder.layers.5.feed_forward.pwff_layer.0.weight', 'encoder.layers.5.feed_forward.pwff_layer.3.bias', 'encoder.layers.5.feed_forward.pwff_layer.3.weight', 'encoder.layers.5.layer_norm.bias', 'encoder.layers.5.layer_norm.weight', 'encoder.layers.5.src_src_att.k_layer.bias', 'encoder.layers.5.src_src_att.k_layer.weight', 'encoder.layers.5.src_src_att.output_layer.bias', 'encoder.layers.5.src_src_att.output_layer.weight', 'encoder.layers.5.src_src_att.q_layer.bias', 'encoder.layers.5.src_src_att.q_layer.weight', 'encoder.layers.5.src_src_att.v_layer.bias', 'encoder.layers.5.src_src_att.v_layer.weight', 'src_embed.lut.weight']
|
129 |
+
2022-08-30 17:59:41,870 - INFO - joeynmt.training - Model(
|
130 |
+
encoder=TransformerEncoder(num_layers=6, num_heads=4, alpha=1.0, layer_norm="pre"),
|
131 |
+
decoder=TransformerDecoder(num_layers=6, num_heads=8, alpha=1.0, layer_norm="pre"),
|
132 |
+
src_embed=Embeddings(embedding_dim=256, vocab_size=10000),
|
133 |
+
trg_embed=Embeddings(embedding_dim=256, vocab_size=10000),
|
134 |
+
loss_function=XentLoss(criterion=KLDivLoss(), smoothing=0.1))
|
135 |
+
2022-08-30 17:59:44,300 - INFO - joeynmt.builders - Adam(lr=0.0002, weight_decay=0.0, betas=[0.9, 0.999])
|
136 |
+
2022-08-30 17:59:44,301 - INFO - joeynmt.builders - WarmupInverseSquareRootScheduler(warmup=2000, decay_rate=0.008944, peak_rate=0.0002, min_rate=1e-08)
|
137 |
+
2022-08-30 17:59:44,301 - INFO - joeynmt.training - Loading model from /content/drive/MyDrive/models/uzbek_kazakh/latest.ckpt
|
138 |
+
2022-08-30 17:59:44,878 - INFO - joeynmt.helpers - Load model from /content/drive/MyDrive/models/uzbek_kazakh/2000.ckpt.
|
139 |
+
2022-08-30 17:59:44,945 - INFO - joeynmt.training - Train stats:
|
140 |
+
device: cuda
|
141 |
+
n_gpu: 1
|
142 |
+
16-bits training: False
|
143 |
+
gradient accumulation: 4
|
144 |
+
batch size per device: 512
|
145 |
+
effective batch size (w. parallel & accumulation): 2048
|
146 |
+
2022-08-30 17:59:44,946 - INFO - joeynmt.training - EPOCH 1
|
147 |
+
2022-08-30 18:00:07,329 - INFO - joeynmt.training - Epoch 1, Step: 2100, Batch Loss: 4.750770, Batch Acc: 0.002161, Tokens per Sec: 4260, Lr: 0.000195
|
148 |
+
2022-08-30 18:00:29,497 - INFO - joeynmt.training - Epoch 1, Step: 2200, Batch Loss: 4.580983, Batch Acc: 0.001989, Tokens per Sec: 4308, Lr: 0.000191
|
149 |
+
2022-08-30 18:00:36,520 - INFO - joeynmt.training - Epoch 1: total training loss 1056.63
|
150 |
+
2022-08-30 18:00:36,521 - INFO - joeynmt.training - EPOCH 2
|
151 |
+
2022-08-30 18:00:51,248 - INFO - joeynmt.training - Epoch 2, Step: 2300, Batch Loss: 4.386467, Batch Acc: 0.003542, Tokens per Sec: 4486, Lr: 0.000187
|
152 |
+
2022-08-30 18:01:13,165 - INFO - joeynmt.training - Epoch 2, Step: 2400, Batch Loss: 4.243957, Batch Acc: 0.002264, Tokens per Sec: 4374, Lr: 0.000183
|
153 |
+
2022-08-30 18:01:26,792 - INFO - joeynmt.training - Epoch 2: total training loss 998.63
|
154 |
+
2022-08-30 18:01:26,793 - INFO - joeynmt.training - EPOCH 3
|
155 |
+
2022-08-30 18:01:35,187 - INFO - joeynmt.training - Epoch 3, Step: 2500, Batch Loss: 4.296048, Batch Acc: 0.006819, Tokens per Sec: 4369, Lr: 0.000179
|
156 |
+
2022-08-30 18:01:58,181 - INFO - joeynmt.training - Epoch 3, Step: 2600, Batch Loss: 4.201078, Batch Acc: 0.002366, Tokens per Sec: 4154, Lr: 0.000175
|
157 |
+
2022-08-30 18:02:19,086 - INFO - joeynmt.training - Epoch 3: total training loss 963.59
|
158 |
+
2022-08-30 18:02:19,086 - INFO - joeynmt.training - EPOCH 4
|
159 |
+
2022-08-30 18:02:20,414 - INFO - joeynmt.training - Epoch 4, Step: 2700, Batch Loss: 3.954871, Batch Acc: 0.047978, Tokens per Sec: 4400, Lr: 0.000172
|
160 |
+
2022-08-30 18:02:42,443 - INFO - joeynmt.training - Epoch 4, Step: 2800, Batch Loss: 3.980452, Batch Acc: 0.002472, Tokens per Sec: 4353, Lr: 0.000169
|
161 |
+
2022-08-30 18:03:04,512 - INFO - joeynmt.training - Epoch 4, Step: 2900, Batch Loss: 4.110939, Batch Acc: 0.002877, Tokens per Sec: 4332, Lr: 0.000166
|
162 |
+
2022-08-30 18:03:10,138 - INFO - joeynmt.training - Epoch 4: total training loss 918.51
|
163 |
+
2022-08-30 18:03:10,139 - INFO - joeynmt.training - EPOCH 5
|
164 |
+
2022-08-30 18:03:26,707 - INFO - joeynmt.training - Epoch 5, Step: 3000, Batch Loss: 3.754799, Batch Acc: 0.003905, Tokens per Sec: 4313, Lr: 0.000163
|
165 |
+
2022-08-30 18:03:26,830 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /content/drive/MyDrive/uzbek_kazakh/validation/cache-1a70e8c13f8ab7d1.arrow
|
166 |
+
2022-08-30 18:03:26,957 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /content/drive/MyDrive/uzbek_kazakh/validation/cache-92f58eebd58af534.arrow
|
167 |
+
2022-08-30 18:03:26,974 - INFO - joeynmt.training - Sample random subset from dev set: n=200, seed=3000
|
168 |
+
2022-08-30 18:03:26,974 - INFO - joeynmt.prediction - Predicting 200 example(s)... (Greedy decoding with min_output_length=1, max_output_length=100, return_prob='none', generate_unk=True, repetition_penalty=-1, no_repeat_ngram_size=-1)
|
169 |
+
2022-08-30 18:03:37,415 - INFO - joeynmt.metrics - nrefs:1|case:mixed|eff:no|tok:13a|smooth:exp|version:2.2.0
|
170 |
+
2022-08-30 18:03:37,415 - INFO - joeynmt.prediction - Evaluation result (greedy) bleu: 1.37, loss: 4.47, ppl: 87.48, acc: 0.21, generation: 10.4094[sec], evaluation: 0.0248[sec]
|
171 |
+
2022-08-30 18:03:37,416 - INFO - joeynmt.training - Hooray! New best validation result [bleu]!
|
172 |
+
2022-08-30 18:03:38,130 - INFO - joeynmt.training - Example #0
|
173 |
+
2022-08-30 18:03:38,131 - DEBUG - joeynmt.training - Tokenized source: ['▁Ikki', 'ga', '▁besh', '▁qo', "'", 'sh', 'sak', '▁-', '▁yetti', '...']
|
174 |
+
2022-08-30 18:03:38,131 - DEBUG - joeynmt.training - Tokenized reference: ['▁Екі', 'ге', '▁бес', 'ті', '▁қос', 'сақ', '▁–', '▁жеті', '...']
|
175 |
+
2022-08-30 18:03:38,131 - DEBUG - joeynmt.training - Tokenized hypothesis: ['▁Жолаушылар', '▁су', 'ға', '▁болады', '▁—', '▁', 'ү', 'л', 'бе', 'ді', '...', '</s>']
|
176 |
+
2022-08-30 18:03:38,133 - INFO - joeynmt.training - Source: Ikkiga besh qo'shsak - yetti...
|
177 |
+
2022-08-30 18:03:38,134 - INFO - joeynmt.training - Reference: Екіге бесті қоссақ – жеті...
|
178 |
+
2022-08-30 18:03:38,134 - INFO - joeynmt.training - Hypothesis: Жолаушылар суға болады — үлбеді...
|
179 |
+
2022-08-30 18:03:38,134 - INFO - joeynmt.training - Example #1
|
180 |
+
2022-08-30 18:03:38,135 - DEBUG - joeynmt.training - Tokenized source: ['▁—', '▁Odamlar', '▁meni', '▁ta', 'b', 'ri', 'k', 'lab', ',', '▁o', 'l', 'qish', 'lash', 'sa', ',', '▁ta', '’', 'zim', '▁qilish', 'im', '▁kerak', '.']
|
181 |
+
2022-08-30 18:03:38,135 - DEBUG - joeynmt.training - Tokenized reference: ['▁–', '▁Ж', 'ұ', 'р', 'т', '▁мені', '▁құттықта', 'п', ',', '▁қол', '▁шап', 'ал', 'ақ', 'тағанда', ',', '▁мен', '▁иіліп', '▁ізет', '▁көрсету', 'ім', '▁керек', '.']
|
182 |
+
2022-08-30 18:03:38,135 - DEBUG - joeynmt.training - Tokenized hypothesis: ['▁—', '▁Мен', '▁де', '▁бір', 'ер', 'м', ',', '▁құдай', 'дан', '▁да', ',', '▁құдай', 'ға', '▁да', '▁да', '▁', 'ұ', 'м', 'ға', 'р', 'уға', '▁тиіс', '.', '</s>']
|
183 |
+
2022-08-30 18:03:38,136 - INFO - joeynmt.training - Source: — Odamlar meni tabriklab, olqishlashsa, ta’zim qilishim kerak.
|
184 |
+
2022-08-30 18:03:38,137 - INFO - joeynmt.training - Reference: – Жұрт мені құттықтап, қол шапалақтағанда, мен иіліп ізет көрсетуім керек.
|
185 |
+
2022-08-30 18:03:38,137 - INFO - joeynmt.training - Hypothesis: — Мен де бірерм, құдайдан да, құдайға да да ұмғаруға тиіс.
|
186 |
+
2022-08-30 18:03:38,137 - INFO - joeynmt.training - Example #2
|
187 |
+
2022-08-30 18:03:38,137 - DEBUG - joeynmt.training - Tokenized source: ['▁-', '▁Me', 'ning', '▁boshqa', '▁g', 'u', 'lim', '▁bor', '.']
|
188 |
+
2022-08-30 18:03:38,138 - DEBUG - joeynmt.training - Tokenized reference: ['▁-', '▁Мен', 'де', '▁тағы', '▁', 'г', 'үл', '▁бар', '.']
|
189 |
+
2022-08-30 18:03:38,138 - DEBUG - joeynmt.training - Tokenized hypothesis: ['▁—', '▁Мен', '▁мен', '▁де', '▁де', '▁де', '▁бар', '.', '</s>']
|
190 |
+
2022-08-30 18:03:38,139 - INFO - joeynmt.training - Source: - Mening boshqa gulim bor.
|
191 |
+
2022-08-30 18:03:38,139 - INFO - joeynmt.training - Reference: - Менде тағы гүл бар.
|
192 |
+
2022-08-30 18:03:38,139 - INFO - joeynmt.training - Hypothesis: — Мен мен де де де бар.
|
193 |
+
2022-08-30 18:03:38,139 - INFO - joeynmt.training - Example #3
|
194 |
+
2022-08-30 18:03:38,140 - DEBUG - joeynmt.training - Tokenized source: ['▁-', '▁Keyin', '▁o', "'", 'zing', 'ni', '▁hukm', '▁qila', 'san', ',', '▁-', '▁de', 'b', '▁javob', '▁berdi', '▁podshoh', '.']
|
195 |
+
2022-08-30 18:03:38,140 - DEBUG - joeynmt.training - Tokenized reference: ['▁-', '▁Онда', '▁сен', '▁өзің', 'ді', '-', 'ө', 'з', 'ің', '▁сотта', 'й', 'сың', ',', '▁-', '▁деп', '▁жауап', '▁берді', '▁патша', '.']
|
196 |
+
2022-08-30 18:03:38,140 - DEBUG - joeynmt.training - Tokenized hypothesis: ['▁—', '▁Мен', '▁менің', '▁менің', '▁менің', '▁менің', '▁жауап', '▁қатты', '▁жауап', '▁қатты', '▁жауап', '▁қатты', '.', '</s>']
|
197 |
+
2022-08-30 18:03:38,141 - INFO - joeynmt.training - Source: - Keyin o'zingni hukm qilasan, - deb javob berdi podshoh.
|
198 |
+
2022-08-30 18:03:38,142 - INFO - joeynmt.training - Reference: - Онда сен өзіңді-өзің соттайсың, - деп жауап берді патша.
|
199 |
+
2022-08-30 18:03:38,142 - INFO - joeynmt.training - Hypothesis: — Мен менің менің менің менің жауап қатты жауап қатты жауап қатты.
|
200 |
+
2022-08-30 18:04:02,226 - INFO - joeynmt.training - Epoch 5, Step: 3100, Batch Loss: 3.828526, Batch Acc: 0.002856, Tokens per Sec: 3826, Lr: 0.000161
|
201 |
+
2022-08-30 18:04:14,876 - INFO - joeynmt.training - Epoch 5: total training loss 884.29
|
202 |
+
2022-08-30 18:04:14,876 - INFO - joeynmt.training - EPOCH 6
|
203 |
+
2022-08-30 18:04:24,473 - INFO - joeynmt.training - Epoch 6, Step: 3200, Batch Loss: 3.779200, Batch Acc: 0.006645, Tokens per Sec: 4266, Lr: 0.000158
|
204 |
+
2022-08-30 18:04:46,547 - INFO - joeynmt.training - Epoch 6, Step: 3300, Batch Loss: 3.653367, Batch Acc: 0.003182, Tokens per Sec: 4329, Lr: 0.000156
|
205 |
+
2022-08-30 18:05:06,475 - INFO - joeynmt.training - Epoch 6: total training loss 848.62
|
206 |
+
2022-08-30 18:05:06,475 - INFO - joeynmt.training - EPOCH 7
|
207 |
+
2022-08-30 18:05:08,841 - INFO - joeynmt.training - Epoch 7, Step: 3400, Batch Loss: 3.523787, Batch Acc: 0.032159, Tokens per Sec: 4525, Lr: 0.000153
|
208 |
+
2022-08-30 18:05:31,084 - INFO - joeynmt.training - Epoch 7, Step: 3500, Batch Loss: 3.398022, Batch Acc: 0.003470, Tokens per Sec: 4314, Lr: 0.000151
|
209 |
+
2022-08-30 18:05:53,281 - INFO - joeynmt.training - Epoch 7, Step: 3600, Batch Loss: 3.536128, Batch Acc: 0.003504, Tokens per Sec: 4295, Lr: 0.000149
|
210 |
+
2022-08-30 18:05:57,868 - INFO - joeynmt.training - Epoch 7: total training loss 813.25
|
211 |
+
2022-08-30 18:05:57,869 - INFO - joeynmt.training - EPOCH 8
|
212 |
+
2022-08-30 18:06:16,504 - INFO - joeynmt.training - Epoch 8, Step: 3700, Batch Loss: 3.461900, Batch Acc: 0.004777, Tokens per Sec: 4044, Lr: 0.000147
|
213 |
+
2022-08-30 18:06:38,665 - INFO - joeynmt.training - Epoch 8, Step: 3800, Batch Loss: 3.559606, Batch Acc: 0.003145, Tokens per Sec: 4319, Lr: 0.000145
|
214 |
+
2022-08-30 18:06:50,213 - INFO - joeynmt.training - Epoch 8: total training loss 778.20
|
215 |
+
2022-08-30 18:06:50,213 - INFO - joeynmt.training - EPOCH 9
|
216 |
+
2022-08-30 18:07:00,919 - INFO - joeynmt.training - Epoch 9, Step: 3900, Batch Loss: 3.354622, Batch Acc: 0.008055, Tokens per Sec: 4303, Lr: 0.000143
|
217 |
+
2022-08-30 18:07:23,151 - INFO - joeynmt.training - Epoch 9, Step: 4000, Batch Loss: 3.341566, Batch Acc: 0.003559, Tokens per Sec: 4348, Lr: 0.000141
|
218 |
+
2022-08-30 18:07:23,531 - INFO - joeynmt.training - Sample random subset from dev set: n=200, seed=4000
|
219 |
+
2022-08-30 18:07:23,531 - INFO - joeynmt.prediction - Predicting 200 example(s)... (Greedy decoding with min_output_length=1, max_output_length=100, return_prob='none', generate_unk=True, repetition_penalty=-1, no_repeat_ngram_size=-1)
|
220 |
+
2022-08-30 18:07:28,791 - INFO - joeynmt.metrics - nrefs:1|case:mixed|eff:no|tok:13a|smooth:exp|version:2.2.0
|
221 |
+
2022-08-30 18:07:28,792 - INFO - joeynmt.prediction - Evaluation result (greedy) bleu: 1.63, loss: 4.42, ppl: 83.09, acc: 0.23, generation: 5.2333[sec], evaluation: 0.0208[sec]
|
222 |
+
2022-08-30 18:07:28,792 - INFO - joeynmt.training - Hooray! New best validation result [bleu]!
|
223 |
+
2022-08-30 18:07:29,519 - INFO - joeynmt.training - Example #0
|
224 |
+
2022-08-30 18:07:29,520 - DEBUG - joeynmt.training - Tokenized source: ['▁–', '▁dedi', '▁podshoh', ',', '▁shekilli', ',', '▁me', 'ning', '▁sa', 'y', 'yo', 'ra', 'm', 'ning', '▁bir', '▁burchagi', 'da', '▁qari', '▁ot', 'j', 'al', 'man', '▁bor', '.']
|
225 |
+
2022-08-30 18:07:29,520 - DEBUG - joeynmt.training - Tokenized reference: ['▁–', '▁деді', '▁патша', ',', '▁менің', '▁п', 'ла', 'не', 'та', 'м', 'ның', '▁бір', '▁бұрыш', 'ында', '▁к', 'ә', 'рі', '▁ат', 'жал', 'ма', 'н', '▁бар', '▁сияқты', '.']
|
226 |
+
2022-08-30 18:07:29,520 - DEBUG - joeynmt.training - Tokenized hypothesis: ['▁-', '▁деді', '▁король', ',', '▁жас', 'ым', 'ның', '▁бір', 'ер', 'ер', '▁ə', 'р', 'кі', 'гі', '▁бір', '▁бір', 'ер', '▁бір', 'ер', '▁бір', '▁түрлі', '▁бір', 'дей', '▁', 'ұ', 'л', 'ды', '.', '</s>']
|
227 |
+
2022-08-30 18:07:29,522 - INFO - joeynmt.training - Source: – dedi podshoh, shekilli, mening sayyoramning bir burchagida qari otjalman bor.
|
228 |
+
2022-08-30 18:07:29,522 - INFO - joeynmt.training - Reference: – деді патша, менің планетамның бір бұрышында кәрі атжалман бар сияқты.
|
229 |
+
2022-08-30 18:07:29,523 - INFO - joeynmt.training - Hypothesis: - деді король, жасымның біререр əркігі бір бірер бірер бір түрлі бірдей ұлды.
|
230 |
+
2022-08-30 18:07:29,523 - INFO - joeynmt.training - Example #1
|
231 |
+
2022-08-30 18:07:29,523 - DEBUG - joeynmt.training - Tokenized source: ['▁—', '▁so', 'ʻ', 'radi', '▁u', '.']
|
232 |
+
2022-08-30 18:07:29,523 - DEBUG - joeynmt.training - Tokenized reference: ['▁–', '▁деп', '▁сұрады', '.']
|
233 |
+
2022-08-30 18:07:29,524 - DEBUG - joeynmt.training - Tokenized hypothesis: ['▁—', '▁деп', '▁сұрады', '.', '</s>']
|
234 |
+
2022-08-30 18:07:29,525 - INFO - joeynmt.training - Source: — soʻradi u.
|
235 |
+
2022-08-30 18:07:29,525 - INFO - joeynmt.training - Reference: – деп сұрады.
|
236 |
+
2022-08-30 18:07:29,525 - INFO - joeynmt.training - Hypothesis: — деп сұрады.
|
237 |
+
2022-08-30 18:07:29,525 - INFO - joeynmt.training - Example #2
|
238 |
+
2022-08-30 18:07:29,526 - DEBUG - joeynmt.training - Tokenized source: ['▁–', '▁qichqirdi', '▁u', '.']
|
239 |
+
2022-08-30 18:07:29,526 - DEBUG - joeynmt.training - Tokenized reference: ['▁–', '▁деп', '▁айқайлап', '▁жіберді', '.']
|
240 |
+
2022-08-30 18:07:29,526 - DEBUG - joeynmt.training - Tokenized hypothesis: ['▁—', '▁деп', '▁айқайлады', '▁ол', '.', '</s>']
|
241 |
+
2022-08-30 18:07:29,527 - INFO - joeynmt.training - Source: – qichqirdi u.
|
242 |
+
2022-08-30 18:07:29,528 - INFO - joeynmt.training - Reference: – деп айқайлап жіберді.
|
243 |
+
2022-08-30 18:07:29,528 - INFO - joeynmt.training - Hypothesis: — деп айқайлады ол.
|
244 |
+
2022-08-30 18:07:29,528 - INFO - joeynmt.training - Example #3
|
245 |
+
2022-08-30 18:07:29,528 - DEBUG - joeynmt.training - Tokenized source: ['▁“', 'Bu', ',', '▁albatta', ',', '▁tartib', '-', 'intizom', '▁masalasi', 'dir', '”', ',', '▁dedi', '▁keyinroq', '▁Kichik', '▁', 'sha', 'h', 'z', 'o', 'da', '.']
|
246 |
+
2022-08-30 18:07:29,529 - DEBUG - joeynmt.training - Tokenized reference: ['▁-', '▁Бұл', '▁ә', 'рине', '▁т', 'ә', 'ртіп', '▁мәселесі', ',', '▁-', '▁деді', '▁маған', '▁Кішкен', 'тай', '▁', 'х', 'ан', 'з', 'а', 'да', '▁кейін', 'ірек', '.']
|
247 |
+
2022-08-30 18:07:29,529 - DEBUG - joeynmt.training - Tokenized hypothesis: ['▁—', '▁Бұл', ',', '▁ə', 'рине', ',', '▁м', 'ə', 'рі', '▁жас', 'ай', 'ға', '▁өте', '▁қатты', '.', '</s>']
|
248 |
+
2022-08-30 18:07:29,530 - INFO - joeynmt.training - Source: “Bu, albatta, tartib-intizom masalasidir”, dedi keyinroq Kichik shahzoda.
|
249 |
+
2022-08-30 18:07:29,530 - INFO - joeynmt.training - Reference: - Бұл әрине тәртіп мәселесі, - деді маған Кішкентай ханзада кейінірек.
|
250 |
+
2022-08-30 18:07:29,530 - INFO - joeynmt.training - Hypothesis: — Бұл, əрине, мəрі жасайға өте қатты.
|
251 |
+
2022-08-30 18:07:48,728 - INFO - joeynmt.training - Epoch 9: total training loss 745.99
|
252 |
+
2022-08-30 18:07:48,728 - INFO - joeynmt.training - EPOCH 10
|
253 |
+
2022-08-30 18:07:52,695 - INFO - joeynmt.training - Epoch 10, Step: 4100, Batch Loss: 2.981759, Batch Acc: 0.019639, Tokens per Sec: 4302, Lr: 0.000140
|
254 |
+
2022-08-30 18:08:15,079 - INFO - joeynmt.training - Epoch 10, Step: 4200, Batch Loss: 3.033593, Batch Acc: 0.004169, Tokens per Sec: 4286, Lr: 0.000138
|
255 |
+
2022-08-30 18:08:38,032 - INFO - joeynmt.training - Epoch 10, Step: 4300, Batch Loss: 3.059270, Batch Acc: 0.003467, Tokens per Sec: 4185, Lr: 0.000136
|
256 |
+
2022-08-30 18:08:40,945 - INFO - joeynmt.training - Epoch 10: total training loss 720.08
|
257 |
+
2022-08-30 18:08:40,945 - INFO - joeynmt.training - Training ended after 10 epochs.
|
258 |
+
2022-08-30 18:08:40,945 - INFO - joeynmt.training - Best validation result (greedy) at step 4000: 1.63 bleu.
|
259 |
+
2022-08-30 18:08:40,975 - INFO - joeynmt.model - Building an encoder-decoder model...
|
260 |
+
2022-08-30 18:08:41,221 - INFO - joeynmt.model - Enc-dec model built.
|
261 |
+
2022-08-30 18:08:41,224 - INFO - joeynmt.model - Total params: 13620224
|
262 |
+
2022-08-30 18:08:41,225 - DEBUG - joeynmt.model - Trainable parameters: ['decoder.layer_norm.bias', 'decoder.layer_norm.weight', 'decoder.layers.0.dec_layer_norm.bias', 'decoder.layers.0.dec_layer_norm.weight', 'decoder.layers.0.feed_forward.layer_norm.bias', 'decoder.layers.0.feed_forward.layer_norm.weight', 'decoder.layers.0.feed_forward.pwff_layer.0.bias', 'decoder.layers.0.feed_forward.pwff_layer.0.weight', 'decoder.layers.0.feed_forward.pwff_layer.3.bias', 'decoder.layers.0.feed_forward.pwff_layer.3.weight', 'decoder.layers.0.src_trg_att.k_layer.bias', 'decoder.layers.0.src_trg_att.k_layer.weight', 'decoder.layers.0.src_trg_att.output_layer.bias', 'decoder.layers.0.src_trg_att.output_layer.weight', 'decoder.layers.0.src_trg_att.q_layer.bias', 'decoder.layers.0.src_trg_att.q_layer.weight', 'decoder.layers.0.src_trg_att.v_layer.bias', 'decoder.layers.0.src_trg_att.v_layer.weight', 'decoder.layers.0.trg_trg_att.k_layer.bias', 'decoder.layers.0.trg_trg_att.k_layer.weight', 'decoder.layers.0.trg_trg_att.output_layer.bias', 'decoder.layers.0.trg_trg_att.output_layer.weight', 'decoder.layers.0.trg_trg_att.q_layer.bias', 'decoder.layers.0.trg_trg_att.q_layer.weight', 'decoder.layers.0.trg_trg_att.v_layer.bias', 'decoder.layers.0.trg_trg_att.v_layer.weight', 'decoder.layers.0.x_layer_norm.bias', 'decoder.layers.0.x_layer_norm.weight', 'decoder.layers.1.dec_layer_norm.bias', 'decoder.layers.1.dec_layer_norm.weight', 'decoder.layers.1.feed_forward.layer_norm.bias', 'decoder.layers.1.feed_forward.layer_norm.weight', 'decoder.layers.1.feed_forward.pwff_layer.0.bias', 'decoder.layers.1.feed_forward.pwff_layer.0.weight', 'decoder.layers.1.feed_forward.pwff_layer.3.bias', 'decoder.layers.1.feed_forward.pwff_layer.3.weight', 'decoder.layers.1.src_trg_att.k_layer.bias', 'decoder.layers.1.src_trg_att.k_layer.weight', 'decoder.layers.1.src_trg_att.output_layer.bias', 'decoder.layers.1.src_trg_att.output_layer.weight', 'decoder.layers.1.src_trg_att.q_layer.bias', 'decoder.layers.1.src_trg_att.q_layer.weight', 'decoder.layers.1.src_trg_att.v_layer.bias', 'decoder.layers.1.src_trg_att.v_layer.weight', 'decoder.layers.1.trg_trg_att.k_layer.bias', 'decoder.layers.1.trg_trg_att.k_layer.weight', 'decoder.layers.1.trg_trg_att.output_layer.bias', 'decoder.layers.1.trg_trg_att.output_layer.weight', 'decoder.layers.1.trg_trg_att.q_layer.bias', 'decoder.layers.1.trg_trg_att.q_layer.weight', 'decoder.layers.1.trg_trg_att.v_layer.bias', 'decoder.layers.1.trg_trg_att.v_layer.weight', 'decoder.layers.1.x_layer_norm.bias', 'decoder.layers.1.x_layer_norm.weight', 'decoder.layers.2.dec_layer_norm.bias', 'decoder.layers.2.dec_layer_norm.weight', 'decoder.layers.2.feed_forward.layer_norm.bias', 'decoder.layers.2.feed_forward.layer_norm.weight', 'decoder.layers.2.feed_forward.pwff_layer.0.bias', 'decoder.layers.2.feed_forward.pwff_layer.0.weight', 'decoder.layers.2.feed_forward.pwff_layer.3.bias', 'decoder.layers.2.feed_forward.pwff_layer.3.weight', 'decoder.layers.2.src_trg_att.k_layer.bias', 'decoder.layers.2.src_trg_att.k_layer.weight', 'decoder.layers.2.src_trg_att.output_layer.bias', 'decoder.layers.2.src_trg_att.output_layer.weight', 'decoder.layers.2.src_trg_att.q_layer.bias', 'decoder.layers.2.src_trg_att.q_layer.weight', 'decoder.layers.2.src_trg_att.v_layer.bias', 'decoder.layers.2.src_trg_att.v_layer.weight', 'decoder.layers.2.trg_trg_att.k_layer.bias', 'decoder.layers.2.trg_trg_att.k_layer.weight', 'decoder.layers.2.trg_trg_att.output_layer.bias', 'decoder.layers.2.trg_trg_att.output_layer.weight', 'decoder.layers.2.trg_trg_att.q_layer.bias', 'decoder.layers.2.trg_trg_att.q_layer.weight', 'decoder.layers.2.trg_trg_att.v_layer.bias', 'decoder.layers.2.trg_trg_att.v_layer.weight', 'decoder.layers.2.x_layer_norm.bias', 'decoder.layers.2.x_layer_norm.weight', 'decoder.layers.3.dec_layer_norm.bias', 'decoder.layers.3.dec_layer_norm.weight', 'decoder.layers.3.feed_forward.layer_norm.bias', 'decoder.layers.3.feed_forward.layer_norm.weight', 'decoder.layers.3.feed_forward.pwff_layer.0.bias', 'decoder.layers.3.feed_forward.pwff_layer.0.weight', 'decoder.layers.3.feed_forward.pwff_layer.3.bias', 'decoder.layers.3.feed_forward.pwff_layer.3.weight', 'decoder.layers.3.src_trg_att.k_layer.bias', 'decoder.layers.3.src_trg_att.k_layer.weight', 'decoder.layers.3.src_trg_att.output_layer.bias', 'decoder.layers.3.src_trg_att.output_layer.weight', 'decoder.layers.3.src_trg_att.q_layer.bias', 'decoder.layers.3.src_trg_att.q_layer.weight', 'decoder.layers.3.src_trg_att.v_layer.bias', 'decoder.layers.3.src_trg_att.v_layer.weight', 'decoder.layers.3.trg_trg_att.k_layer.bias', 'decoder.layers.3.trg_trg_att.k_layer.weight', 'decoder.layers.3.trg_trg_att.output_layer.bias', 'decoder.layers.3.trg_trg_att.output_layer.weight', 'decoder.layers.3.trg_trg_att.q_layer.bias', 'decoder.layers.3.trg_trg_att.q_layer.weight', 'decoder.layers.3.trg_trg_att.v_layer.bias', 'decoder.layers.3.trg_trg_att.v_layer.weight', 'decoder.layers.3.x_layer_norm.bias', 'decoder.layers.3.x_layer_norm.weight', 'decoder.layers.4.dec_layer_norm.bias', 'decoder.layers.4.dec_layer_norm.weight', 'decoder.layers.4.feed_forward.layer_norm.bias', 'decoder.layers.4.feed_forward.layer_norm.weight', 'decoder.layers.4.feed_forward.pwff_layer.0.bias', 'decoder.layers.4.feed_forward.pwff_layer.0.weight', 'decoder.layers.4.feed_forward.pwff_layer.3.bias', 'decoder.layers.4.feed_forward.pwff_layer.3.weight', 'decoder.layers.4.src_trg_att.k_layer.bias', 'decoder.layers.4.src_trg_att.k_layer.weight', 'decoder.layers.4.src_trg_att.output_layer.bias', 'decoder.layers.4.src_trg_att.output_layer.weight', 'decoder.layers.4.src_trg_att.q_layer.bias', 'decoder.layers.4.src_trg_att.q_layer.weight', 'decoder.layers.4.src_trg_att.v_layer.bias', 'decoder.layers.4.src_trg_att.v_layer.weight', 'decoder.layers.4.trg_trg_att.k_layer.bias', 'decoder.layers.4.trg_trg_att.k_layer.weight', 'decoder.layers.4.trg_trg_att.output_layer.bias', 'decoder.layers.4.trg_trg_att.output_layer.weight', 'decoder.layers.4.trg_trg_att.q_layer.bias', 'decoder.layers.4.trg_trg_att.q_layer.weight', 'decoder.layers.4.trg_trg_att.v_layer.bias', 'decoder.layers.4.trg_trg_att.v_layer.weight', 'decoder.layers.4.x_layer_norm.bias', 'decoder.layers.4.x_layer_norm.weight', 'decoder.layers.5.dec_layer_norm.bias', 'decoder.layers.5.dec_layer_norm.weight', 'decoder.layers.5.feed_forward.layer_norm.bias', 'decoder.layers.5.feed_forward.layer_norm.weight', 'decoder.layers.5.feed_forward.pwff_layer.0.bias', 'decoder.layers.5.feed_forward.pwff_layer.0.weight', 'decoder.layers.5.feed_forward.pwff_layer.3.bias', 'decoder.layers.5.feed_forward.pwff_layer.3.weight', 'decoder.layers.5.src_trg_att.k_layer.bias', 'decoder.layers.5.src_trg_att.k_layer.weight', 'decoder.layers.5.src_trg_att.output_layer.bias', 'decoder.layers.5.src_trg_att.output_layer.weight', 'decoder.layers.5.src_trg_att.q_layer.bias', 'decoder.layers.5.src_trg_att.q_layer.weight', 'decoder.layers.5.src_trg_att.v_layer.bias', 'decoder.layers.5.src_trg_att.v_layer.weight', 'decoder.layers.5.trg_trg_att.k_layer.bias', 'decoder.layers.5.trg_trg_att.k_layer.weight', 'decoder.layers.5.trg_trg_att.output_layer.bias', 'decoder.layers.5.trg_trg_att.output_layer.weight', 'decoder.layers.5.trg_trg_att.q_layer.bias', 'decoder.layers.5.trg_trg_att.q_layer.weight', 'decoder.layers.5.trg_trg_att.v_layer.bias', 'decoder.layers.5.trg_trg_att.v_layer.weight', 'decoder.layers.5.x_layer_norm.bias', 'decoder.layers.5.x_layer_norm.weight', 'encoder.layer_norm.bias', 'encoder.layer_norm.weight', 'encoder.layers.0.feed_forward.layer_norm.bias', 'encoder.layers.0.feed_forward.layer_norm.weight', 'encoder.layers.0.feed_forward.pwff_layer.0.bias', 'encoder.layers.0.feed_forward.pwff_layer.0.weight', 'encoder.layers.0.feed_forward.pwff_layer.3.bias', 'encoder.layers.0.feed_forward.pwff_layer.3.weight', 'encoder.layers.0.layer_norm.bias', 'encoder.layers.0.layer_norm.weight', 'encoder.layers.0.src_src_att.k_layer.bias', 'encoder.layers.0.src_src_att.k_layer.weight', 'encoder.layers.0.src_src_att.output_layer.bias', 'encoder.layers.0.src_src_att.output_layer.weight', 'encoder.layers.0.src_src_att.q_layer.bias', 'encoder.layers.0.src_src_att.q_layer.weight', 'encoder.layers.0.src_src_att.v_layer.bias', 'encoder.layers.0.src_src_att.v_layer.weight', 'encoder.layers.1.feed_forward.layer_norm.bias', 'encoder.layers.1.feed_forward.layer_norm.weight', 'encoder.layers.1.feed_forward.pwff_layer.0.bias', 'encoder.layers.1.feed_forward.pwff_layer.0.weight', 'encoder.layers.1.feed_forward.pwff_layer.3.bias', 'encoder.layers.1.feed_forward.pwff_layer.3.weight', 'encoder.layers.1.layer_norm.bias', 'encoder.layers.1.layer_norm.weight', 'encoder.layers.1.src_src_att.k_layer.bias', 'encoder.layers.1.src_src_att.k_layer.weight', 'encoder.layers.1.src_src_att.output_layer.bias', 'encoder.layers.1.src_src_att.output_layer.weight', 'encoder.layers.1.src_src_att.q_layer.bias', 'encoder.layers.1.src_src_att.q_layer.weight', 'encoder.layers.1.src_src_att.v_layer.bias', 'encoder.layers.1.src_src_att.v_layer.weight', 'encoder.layers.2.feed_forward.layer_norm.bias', 'encoder.layers.2.feed_forward.layer_norm.weight', 'encoder.layers.2.feed_forward.pwff_layer.0.bias', 'encoder.layers.2.feed_forward.pwff_layer.0.weight', 'encoder.layers.2.feed_forward.pwff_layer.3.bias', 'encoder.layers.2.feed_forward.pwff_layer.3.weight', 'encoder.layers.2.layer_norm.bias', 'encoder.layers.2.layer_norm.weight', 'encoder.layers.2.src_src_att.k_layer.bias', 'encoder.layers.2.src_src_att.k_layer.weight', 'encoder.layers.2.src_src_att.output_layer.bias', 'encoder.layers.2.src_src_att.output_layer.weight', 'encoder.layers.2.src_src_att.q_layer.bias', 'encoder.layers.2.src_src_att.q_layer.weight', 'encoder.layers.2.src_src_att.v_layer.bias', 'encoder.layers.2.src_src_att.v_layer.weight', 'encoder.layers.3.feed_forward.layer_norm.bias', 'encoder.layers.3.feed_forward.layer_norm.weight', 'encoder.layers.3.feed_forward.pwff_layer.0.bias', 'encoder.layers.3.feed_forward.pwff_layer.0.weight', 'encoder.layers.3.feed_forward.pwff_layer.3.bias', 'encoder.layers.3.feed_forward.pwff_layer.3.weight', 'encoder.layers.3.layer_norm.bias', 'encoder.layers.3.layer_norm.weight', 'encoder.layers.3.src_src_att.k_layer.bias', 'encoder.layers.3.src_src_att.k_layer.weight', 'encoder.layers.3.src_src_att.output_layer.bias', 'encoder.layers.3.src_src_att.output_layer.weight', 'encoder.layers.3.src_src_att.q_layer.bias', 'encoder.layers.3.src_src_att.q_layer.weight', 'encoder.layers.3.src_src_att.v_layer.bias', 'encoder.layers.3.src_src_att.v_layer.weight', 'encoder.layers.4.feed_forward.layer_norm.bias', 'encoder.layers.4.feed_forward.layer_norm.weight', 'encoder.layers.4.feed_forward.pwff_layer.0.bias', 'encoder.layers.4.feed_forward.pwff_layer.0.weight', 'encoder.layers.4.feed_forward.pwff_layer.3.bias', 'encoder.layers.4.feed_forward.pwff_layer.3.weight', 'encoder.layers.4.layer_norm.bias', 'encoder.layers.4.layer_norm.weight', 'encoder.layers.4.src_src_att.k_layer.bias', 'encoder.layers.4.src_src_att.k_layer.weight', 'encoder.layers.4.src_src_att.output_layer.bias', 'encoder.layers.4.src_src_att.output_layer.weight', 'encoder.layers.4.src_src_att.q_layer.bias', 'encoder.layers.4.src_src_att.q_layer.weight', 'encoder.layers.4.src_src_att.v_layer.bias', 'encoder.layers.4.src_src_att.v_layer.weight', 'encoder.layers.5.feed_forward.layer_norm.bias', 'encoder.layers.5.feed_forward.layer_norm.weight', 'encoder.layers.5.feed_forward.pwff_layer.0.bias', 'encoder.layers.5.feed_forward.pwff_layer.0.weight', 'encoder.layers.5.feed_forward.pwff_layer.3.bias', 'encoder.layers.5.feed_forward.pwff_layer.3.weight', 'encoder.layers.5.layer_norm.bias', 'encoder.layers.5.layer_norm.weight', 'encoder.layers.5.src_src_att.k_layer.bias', 'encoder.layers.5.src_src_att.k_layer.weight', 'encoder.layers.5.src_src_att.output_layer.bias', 'encoder.layers.5.src_src_att.output_layer.weight', 'encoder.layers.5.src_src_att.q_layer.bias', 'encoder.layers.5.src_src_att.q_layer.weight', 'encoder.layers.5.src_src_att.v_layer.bias', 'encoder.layers.5.src_src_att.v_layer.weight', 'src_embed.lut.weight']
|
263 |
+
2022-08-30 18:08:41,647 - INFO - joeynmt.helpers - Load model from /content/drive/MyDrive/models/uzbek_kazakh_resume/4000.ckpt.
|
264 |
+
2022-08-30 18:08:42,069 - INFO - joeynmt.prediction - Decoding on dev set...
|
265 |
+
2022-08-30 18:08:42,069 - INFO - joeynmt.prediction - Predicting 1000 example(s)... (Beam search with beam_size=5, beam_alpha=1.0, n_best=1, min_output_length=1, max_output_length=100, return_prob='none', generate_unk=True, repetition_penalty=-1, no_repeat_ngram_size=-1)
|
266 |
+
2022-08-30 18:09:26,852 - INFO - joeynmt.metrics - nrefs:1|case:mixed|eff:no|tok:13a|smooth:exp|version:2.2.0
|
267 |
+
2022-08-30 18:09:26,853 - INFO - joeynmt.prediction - Evaluation result (beam search) bleu: 2.57, generation: 44.6640[sec], evaluation: 0.1006[sec]
|
268 |
+
2022-08-30 18:09:26,858 - INFO - joeynmt.prediction - Translations saved to: /content/drive/MyDrive/models/uzbek_kazakh_resume/00004000.hyps.dev.
|
269 |
+
2022-08-30 18:09:26,979 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /content/drive/MyDrive/uzbek_kazakh/test/cache-33919be4354fb89b.arrow
|
270 |
+
2022-08-30 18:09:27,095 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /content/drive/MyDrive/uzbek_kazakh/test/cache-6ccb156ae45ce241.arrow
|
271 |
+
2022-08-30 18:09:27,098 - INFO - joeynmt.prediction - Decoding on test set...
|
272 |
+
2022-08-30 18:09:27,098 - INFO - joeynmt.prediction - Predicting 1000 example(s)... (Beam search with beam_size=5, beam_alpha=1.0, n_best=1, min_output_length=1, max_output_length=100, return_prob='none', generate_unk=True, repetition_penalty=-1, no_repeat_ngram_size=-1)
|
273 |
+
2022-08-30 18:10:20,714 - INFO - joeynmt.metrics - nrefs:1|case:mixed|eff:no|tok:13a|smooth:exp|version:2.2.0
|
274 |
+
2022-08-30 18:10:20,714 - INFO - joeynmt.prediction - Evaluation result (beam search) bleu: 3.99, generation: 53.4864[sec], evaluation: 0.1099[sec]
|
275 |
+
2022-08-30 18:10:20,720 - INFO - joeynmt.prediction - Translations saved to: /content/drive/MyDrive/models/uzbek_kazakh_resume/00004000.hyps.test.
|