diff --git "a/nncf_output.log" "b/nncf_output.log" new file mode 100644--- /dev/null +++ "b/nncf_output.log" @@ -0,0 +1,18849 @@ +INFO:nncf:Ignored adding weight quantizer for: BertForSequenceClassification/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[word_embeddings]/embedding_0 +INFO:nncf:Ignored adding weight quantizer for: BertForSequenceClassification/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[token_type_embeddings]/embedding_0 +INFO:nncf:Ignored adding weight quantizer for: BertForSequenceClassification/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[position_embeddings]/embedding_0 +INFO:nncf:Not adding activation input quantizer for operation: 4 BertForSequenceClassification/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[word_embeddings]/embedding_0 +INFO:nncf:Not adding activation input quantizer for operation: 5 BertForSequenceClassification/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[token_type_embeddings]/embedding_0 +INFO:nncf:Not adding activation input quantizer for operation: 6 BertForSequenceClassification/BertModel[bert]/BertEmbeddings[embeddings]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 8 BertForSequenceClassification/BertModel[bert]/BertEmbeddings[embeddings]/__iadd___0 +INFO:nncf:Not adding activation input quantizer for operation: 9 BertForSequenceClassification/BertModel[bert]/BertEmbeddings[embeddings]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 10 BertForSequenceClassification/BertModel[bert]/BertEmbeddings[embeddings]/Dropout[dropout]/dropout_0 +INFO:nncf:Not adding activation input quantizer for operation: 23 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 26 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/matmul_1 +INFO:nncf:Not adding activation input quantizer for operation: 32 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 33 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 38 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 39 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 52 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 55 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/matmul_1 +INFO:nncf:Not adding activation input quantizer for operation: 61 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 62 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 67 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 68 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 81 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 84 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/matmul_1 +INFO:nncf:Not adding activation input quantizer for operation: 90 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 91 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 96 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 97 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 110 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 113 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/matmul_1 +INFO:nncf:Not adding activation input quantizer for operation: 119 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 120 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 125 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 126 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 139 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 142 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/matmul_1 +INFO:nncf:Not adding activation input quantizer for operation: 148 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 149 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 154 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 155 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 168 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 171 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/matmul_1 +INFO:nncf:Not adding activation input quantizer for operation: 177 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 178 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 183 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 184 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 197 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 200 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/matmul_1 +INFO:nncf:Not adding activation input quantizer for operation: 206 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 207 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 212 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 213 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 226 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 229 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/matmul_1 +INFO:nncf:Not adding activation input quantizer for operation: 235 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 236 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 241 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 242 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 255 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 258 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/matmul_1 +INFO:nncf:Not adding activation input quantizer for operation: 264 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 265 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 270 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 271 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 284 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 287 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/matmul_1 +INFO:nncf:Not adding activation input quantizer for operation: 293 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 294 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 299 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 300 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 313 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 316 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/matmul_1 +INFO:nncf:Not adding activation input quantizer for operation: 322 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 323 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 328 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 329 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 342 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 345 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/matmul_1 +INFO:nncf:Not adding activation input quantizer for operation: 351 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 352 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 357 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 358 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Collecting tensor statistics |█ | 1 / 10 +INFO:nncf:Collecting tensor statistics |███ | 2 / 10 +INFO:nncf:Collecting tensor statistics |████ | 3 / 10 +INFO:nncf:Collecting tensor statistics |██████ | 4 / 10 +INFO:nncf:Collecting tensor statistics |████████ | 5 / 10 +INFO:nncf:Collecting tensor statistics |█████████ | 6 / 10 +INFO:nncf:Collecting tensor statistics |███████████ | 7 / 10 +INFO:nncf:Collecting tensor statistics |████████████ | 8 / 10 +INFO:nncf:Collecting tensor statistics |██████████████ | 9 / 10 +INFO:nncf:Collecting tensor statistics |████████████████| 10 / 10 +INFO:nncf:Ignored adding weight sparsifier for operation: BertForSequenceClassification/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[word_embeddings]/embedding_0 +INFO:nncf:Ignored adding weight sparsifier for operation: BertForSequenceClassification/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[position_embeddings]/embedding_0 +INFO:nncf:Ignored adding weight sparsifier for operation: BertForSequenceClassification/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[token_type_embeddings]/embedding_0 +INFO:nncf:Ignored adding weight sparsifier for operation: BertForSequenceClassification/BertModel[bert]/BertPooler[pooler]/NNCFLinear[dense]/linear_0 +INFO:nncf:Ignored adding weight sparsifier for operation: BertForSequenceClassification/NNCFLinear[classifier]/linear_0 +INFO:nncf:Compiling and loading torch extension: quantized_functions_cuda... +INFO:nncf:Finished loading torch extension: quantized_functions_cuda +INFO:nncf:Statistics of the quantization algorithm: +Epoch 0 |+--------------------------------+-------+ +Epoch 0 || Statistic's name | Value | +Epoch 0 |+================================+=======+ +Epoch 0 || Ratio of enabled quantizations | 100 | +Epoch 0 |+--------------------------------+-------+ +Epoch 0 | +Epoch 0 |Statistics of the quantization share: +Epoch 0 |+----------------------------------+--------------------+ +Epoch 0 || Statistic's name | Value | +Epoch 0 |+==================================+====================+ +Epoch 0 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 0 |+----------------------------------+--------------------+ +Epoch 0 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 0 |+----------------------------------+--------------------+ +Epoch 0 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 0 |+----------------------------------+--------------------+ +Epoch 0 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 0 |+----------------------------------+--------------------+ +Epoch 0 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 0 |+----------------------------------+--------------------+ +Epoch 0 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 0 |+----------------------------------+--------------------+ +Epoch 0 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 0 |+----------------------------------+--------------------+ +Epoch 0 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 0 |+----------------------------------+--------------------+ +Epoch 0 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 0 |+----------------------------------+--------------------+ +Epoch 0 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 0 |+----------------------------------+--------------------+ +Epoch 0 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 0 |+----------------------------------+--------------------+ +Epoch 0 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 0 |+----------------------------------+--------------------+ +Epoch 0 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 0 |+----------------------------------+--------------------+ +Epoch 0 | +Epoch 0 |Statistics of the bitwidth distribution: +Epoch 0 |+--------------+---------------------+--------------------+--------------------+ +Epoch 0 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 0 || | WQs | Placed AQs | Qs | +Epoch 0 |+==============+=====================+====================+====================+ +Epoch 0 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 0 || | | | 173) | +Epoch 0 |+--------------+---------------------+--------------------+--------------------+ +Epoch 0 | +Epoch 0 |Statistics of the sparsified model: +Epoch 0 |+-----------------------------------------+-------+ +Epoch 0 || Statistic's name | Value | +Epoch 0 |+=========================================+=======+ +Epoch 0 || Sparsity level of the whole model | 0.000 | +Epoch 0 |+-----------------------------------------+-------+ +Epoch 0 || Sparsity level of all sparsified layers | 0.000 | +Epoch 0 |+-----------------------------------------+-------+ +Epoch 0 | +Epoch 0 |Statistics by sparsified layers: +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 0 |+======================+================+================+=====================+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[0]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[qu | | | | +Epoch 0 || ery]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[0]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[ke | | | | +Epoch 0 || y]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[0]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[va | | | | +Epoch 0 || lue]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[0]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfOutput[ou | | | | +Epoch 0 || tput]/NNCFLinear[den | | | | +Epoch 0 || se]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [3072, 768] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[0]/Be | | | | +Epoch 0 || rtIntermediate[inter | | | | +Epoch 0 || mediate]/NNCFLinear[ | | | | +Epoch 0 || dense]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 3072] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[0]/Be | | | | +Epoch 0 || rtOutput[output]/NNC | | | | +Epoch 0 || FLinear[dense]/linea | | | | +Epoch 0 || r_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[1]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[qu | | | | +Epoch 0 || ery]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[1]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[ke | | | | +Epoch 0 || y]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[1]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[va | | | | +Epoch 0 || lue]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[1]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfOutput[ou | | | | +Epoch 0 || tput]/NNCFLinear[den | | | | +Epoch 0 || se]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [3072, 768] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[1]/Be | | | | +Epoch 0 || rtIntermediate[inter | | | | +Epoch 0 || mediate]/NNCFLinear[ | | | | +Epoch 0 || dense]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 3072] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[1]/Be | | | | +Epoch 0 || rtOutput[output]/NNC | | | | +Epoch 0 || FLinear[dense]/linea | | | | +Epoch 0 || r_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[2]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[qu | | | | +Epoch 0 || ery]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[2]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[ke | | | | +Epoch 0 || y]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[2]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[va | | | | +Epoch 0 || lue]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[2]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfOutput[ou | | | | +Epoch 0 || tput]/NNCFLinear[den | | | | +Epoch 0 || se]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [3072, 768] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[2]/Be | | | | +Epoch 0 || rtIntermediate[inter | | | | +Epoch 0 || mediate]/NNCFLinear[ | | | | +Epoch 0 || dense]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 3072] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[2]/Be | | | | +Epoch 0 || rtOutput[output]/NNC | | | | +Epoch 0 || FLinear[dense]/linea | | | | +Epoch 0 || r_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[3]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[qu | | | | +Epoch 0 || ery]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[3]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[ke | | | | +Epoch 0 || y]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[3]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[va | | | | +Epoch 0 || lue]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[3]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfOutput[ou | | | | +Epoch 0 || tput]/NNCFLinear[den | | | | +Epoch 0 || se]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [3072, 768] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[3]/Be | | | | +Epoch 0 || rtIntermediate[inter | | | | +Epoch 0 || mediate]/NNCFLinear[ | | | | +Epoch 0 || dense]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 3072] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[3]/Be | | | | +Epoch 0 || rtOutput[output]/NNC | | | | +Epoch 0 || FLinear[dense]/linea | | | | +Epoch 0 || r_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[4]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[qu | | | | +Epoch 0 || ery]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[4]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[ke | | | | +Epoch 0 || y]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[4]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[va | | | | +Epoch 0 || lue]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[4]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfOutput[ou | | | | +Epoch 0 || tput]/NNCFLinear[den | | | | +Epoch 0 || se]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [3072, 768] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[4]/Be | | | | +Epoch 0 || rtIntermediate[inter | | | | +Epoch 0 || mediate]/NNCFLinear[ | | | | +Epoch 0 || dense]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 3072] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[4]/Be | | | | +Epoch 0 || rtOutput[output]/NNC | | | | +Epoch 0 || FLinear[dense]/linea | | | | +Epoch 0 || r_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[5]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[qu | | | | +Epoch 0 || ery]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[5]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[ke | | | | +Epoch 0 || y]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[5]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[va | | | | +Epoch 0 || lue]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[5]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfOutput[ou | | | | +Epoch 0 || tput]/NNCFLinear[den | | | | +Epoch 0 || se]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [3072, 768] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[5]/Be | | | | +Epoch 0 || rtIntermediate[inter | | | | +Epoch 0 || mediate]/NNCFLinear[ | | | | +Epoch 0 || dense]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 3072] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[5]/Be | | | | +Epoch 0 || rtOutput[output]/NNC | | | | +Epoch 0 || FLinear[dense]/linea | | | | +Epoch 0 || r_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[6]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[qu | | | | +Epoch 0 || ery]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[6]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[ke | | | | +Epoch 0 || y]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[6]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[va | | | | +Epoch 0 || lue]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[6]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfOutput[ou | | | | +Epoch 0 || tput]/NNCFLinear[den | | | | +Epoch 0 || se]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [3072, 768] | 0.000 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[6]/Be | | | | +Epoch 0 || rtIntermediate[inter | | | | +Epoch 0 || mediate]/NNCFLinear[ | | | | +Epoch 0 || dense]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 3072] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[6]/Be | | | | +Epoch 0 || rtOutput[output]/NNC | | | | +Epoch 0 || FLinear[dense]/linea | | | | +Epoch 0 || r_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[7]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[qu | | | | +Epoch 0 || ery]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[7]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[ke | | | | +Epoch 0 || y]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[7]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[va | | | | +Epoch 0 || lue]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[7]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfOutput[ou | | | | +Epoch 0 || tput]/NNCFLinear[den | | | | +Epoch 0 || se]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [3072, 768] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[7]/Be | | | | +Epoch 0 || rtIntermediate[inter | | | | +Epoch 0 || mediate]/NNCFLinear[ | | | | +Epoch 0 || dense]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 3072] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[7]/Be | | | | +Epoch 0 || rtOutput[output]/NNC | | | | +Epoch 0 || FLinear[dense]/linea | | | | +Epoch 0 || r_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[8]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[qu | | | | +Epoch 0 || ery]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[8]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[ke | | | | +Epoch 0 || y]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[8]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[va | | | | +Epoch 0 || lue]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[8]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfOutput[ou | | | | +Epoch 0 || tput]/NNCFLinear[den | | | | +Epoch 0 || se]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [3072, 768] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[8]/Be | | | | +Epoch 0 || rtIntermediate[inter | | | | +Epoch 0 || mediate]/NNCFLinear[ | | | | +Epoch 0 || dense]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 3072] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[8]/Be | | | | +Epoch 0 || rtOutput[output]/NNC | | | | +Epoch 0 || FLinear[dense]/linea | | | | +Epoch 0 || r_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[9]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[qu | | | | +Epoch 0 || ery]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[9]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[ke | | | | +Epoch 0 || y]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[9]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[va | | | | +Epoch 0 || lue]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[9]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfOutput[ou | | | | +Epoch 0 || tput]/NNCFLinear[den | | | | +Epoch 0 || se]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [3072, 768] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[9]/Be | | | | +Epoch 0 || rtIntermediate[inter | | | | +Epoch 0 || mediate]/NNCFLinear[ | | | | +Epoch 0 || dense]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 3072] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[9]/Be | | | | +Epoch 0 || rtOutput[output]/NNC | | | | +Epoch 0 || FLinear[dense]/linea | | | | +Epoch 0 || r_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[10]/B | | | | +Epoch 0 || ertAttention[attenti | | | | +Epoch 0 || on]/BertSelfAttentio | | | | +Epoch 0 || n[self]/NNCFLinear[q | | | | +Epoch 0 || uery]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[10]/B | | | | +Epoch 0 || ertAttention[attenti | | | | +Epoch 0 || on]/BertSelfAttentio | | | | +Epoch 0 || n[self]/NNCFLinear[k | | | | +Epoch 0 || ey]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[10]/B | | | | +Epoch 0 || ertAttention[attenti | | | | +Epoch 0 || on]/BertSelfAttentio | | | | +Epoch 0 || n[self]/NNCFLinear[v | | | | +Epoch 0 || alue]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[10]/B | | | | +Epoch 0 || ertAttention[attenti | | | | +Epoch 0 || on]/BertSelfOutput[o | | | | +Epoch 0 || utput]/NNCFLinear[de | | | | +Epoch 0 || nse]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [3072, 768] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[10]/B | | | | +Epoch 0 || ertIntermediate[inte | | | | +Epoch 0 || rmediate]/NNCFLinear | | | | +Epoch 0 || [dense]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 3072] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[10]/B | | | | +Epoch 0 || ertOutput[output]/NN | | | | +Epoch 0 || CFLinear[dense]/line | | | | +Epoch 0 || ar_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[11]/B | | | | +Epoch 0 || ertAttention[attenti | | | | +Epoch 0 || on]/BertSelfAttentio | | | | +Epoch 0 || n[self]/NNCFLinear[q | | | | +Epoch 0 || uery]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[11]/B | | | | +Epoch 0 || ertAttention[attenti | | | | +Epoch 0 || on]/BertSelfAttentio | | | | +Epoch 0 || n[self]/NNCFLinear[k | | | | +Epoch 0 || ey]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[11]/B | | | | +Epoch 0 || ertAttention[attenti | | | | +Epoch 0 || on]/BertSelfAttentio | | | | +Epoch 0 || n[self]/NNCFLinear[v | | | | +Epoch 0 || alue]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[11]/B | | | | +Epoch 0 || ertAttention[attenti | | | | +Epoch 0 || on]/BertSelfOutput[o | | | | +Epoch 0 || utput]/NNCFLinear[de | | | | +Epoch 0 || nse]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [3072, 768] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[11]/B | | | | +Epoch 0 || ertIntermediate[inte | | | | +Epoch 0 || rmediate]/NNCFLinear | | | | +Epoch 0 || [dense]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 3072] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[11]/B | | | | +Epoch 0 || ertOutput[output]/NN | | | | +Epoch 0 || CFLinear[dense]/line | | | | +Epoch 0 || ar_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 | +Epoch 0 |Statistics of the magnitude sparsity algorithm: +Epoch 0 |+----------------------------------------------------------------------+-------+ +Epoch 0 || Statistic's name | Value | +Epoch 0 |+======================================================================+=======+ +Epoch 0 || A target level of the sparsity for the algorithm for the current | 0 | +Epoch 0 || epoch | | +Epoch 0 |+----------------------------------------------------------------------+-------+ +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || Layer's name | Sparsity threshold | +Epoch 0 |+=========================================================+====================+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 0 || near_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 0 || inear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 0 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 0 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 0 || near_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 0 || inear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 0 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 0 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 0 || near_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 0 || inear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 0 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 0 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 0 || near_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 0 || inear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 0 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 0 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 0 || near_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 0 || inear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 0 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 0 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 0 || near_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 0 || inear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 0 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 0 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 0 || near_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 0 || inear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 0 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 0 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 0 || near_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 0 || inear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 0 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 0 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 0 || near_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 0 || inear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 0 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 0 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 0 || near_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 0 || inear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 0 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 0 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 0 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 0 || /linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 0 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 0 || inear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 0 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 0 || /linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 0 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 0 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 0 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 0 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 0 || /linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 0 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 0 || inear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 0 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 0 || /linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 0 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 0 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 0 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 1 |+--------------------------------+-------+ +Epoch 1 || Statistic's name | Value | +Epoch 1 |+================================+=======+ +Epoch 1 || Ratio of enabled quantizations | 100 | +Epoch 1 |+--------------------------------+-------+ +Epoch 1 | +Epoch 1 |Statistics of the quantization share: +Epoch 1 |+----------------------------------+--------------------+ +Epoch 1 || Statistic's name | Value | +Epoch 1 |+==================================+====================+ +Epoch 1 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 1 |+----------------------------------+--------------------+ +Epoch 1 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 1 |+----------------------------------+--------------------+ +Epoch 1 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 1 |+----------------------------------+--------------------+ +Epoch 1 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 1 |+----------------------------------+--------------------+ +Epoch 1 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 1 |+----------------------------------+--------------------+ +Epoch 1 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 1 |+----------------------------------+--------------------+ +Epoch 1 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 1 |+----------------------------------+--------------------+ +Epoch 1 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 1 |+----------------------------------+--------------------+ +Epoch 1 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 1 |+----------------------------------+--------------------+ +Epoch 1 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 1 |+----------------------------------+--------------------+ +Epoch 1 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 1 |+----------------------------------+--------------------+ +Epoch 1 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 1 |+----------------------------------+--------------------+ +Epoch 1 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 1 |+----------------------------------+--------------------+ +Epoch 1 | +Epoch 1 |Statistics of the bitwidth distribution: +Epoch 1 |+--------------+---------------------+--------------------+--------------------+ +Epoch 1 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 1 || | WQs | Placed AQs | Qs | +Epoch 1 |+==============+=====================+====================+====================+ +Epoch 1 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 1 || | | | 173) | +Epoch 1 |+--------------+---------------------+--------------------+--------------------+ +Epoch 1 | +Epoch 1 |Statistics of the sparsified model: +Epoch 1 |+-----------------------------------------+-------+ +Epoch 1 || Statistic's name | Value | +Epoch 1 |+=========================================+=======+ +Epoch 1 || Sparsity level of the whole model | 0.185 | +Epoch 1 |+-----------------------------------------+-------+ +Epoch 1 || Sparsity level of all sparsified layers | 0.238 | +Epoch 1 |+-----------------------------------------+-------+ +Epoch 1 | +Epoch 1 |Statistics by sparsified layers: +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 1 |+======================+================+================+=====================+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.149 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[0]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.151 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[0]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.156 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[0]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.160 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[0]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072, 768] | 0.278 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[0]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 3072] | 0.287 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[0]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.146 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[1]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.148 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[1]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.153 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[1]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.160 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[1]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072, 768] | 0.279 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[1]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 3072] | 0.288 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[1]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.153 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[2]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.154 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[2]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.156 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[2]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.160 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[2]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072, 768] | 0.280 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[2]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 3072] | 0.289 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[2]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.146 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[3]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.148 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[3]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.155 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[3]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.158 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[3]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072, 768] | 0.281 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[3]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 3072] | 0.292 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[3]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.146 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[4]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.146 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[4]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.152 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[4]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.155 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[4]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072, 768] | 0.282 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[4]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 3072] | 0.295 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[4]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.145 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[5]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.145 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[5]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.152 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[5]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.154 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[5]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072, 768] | 0.281 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[5]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 3072] | 0.294 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[5]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.144 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[6]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.144 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[6]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.153 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[6]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.155 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[6]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072, 768] | 0.280 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[6]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 3072] | 0.292 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[6]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.143 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[7]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.144 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[7]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.150 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[7]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.151 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[7]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072, 768] | 0.278 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[7]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 3072] | 0.287 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[7]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.143 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[8]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.144 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[8]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.147 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[8]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.147 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[8]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072, 768] | 0.275 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[8]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 3072] | 0.283 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[8]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.141 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[9]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.143 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[9]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.145 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[9]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.144 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[9]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072, 768] | 0.278 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[9]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 3072] | 0.285 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[9]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.141 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[10]/B | | | | +Epoch 1 || ertAttention[attenti | | | | +Epoch 1 || on]/BertSelfAttentio | | | | +Epoch 1 || n[self]/NNCFLinear[q | | | | +Epoch 1 || uery]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.142 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[10]/B | | | | +Epoch 1 || ertAttention[attenti | | | | +Epoch 1 || on]/BertSelfAttentio | | | | +Epoch 1 || n[self]/NNCFLinear[k | | | | +Epoch 1 || ey]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.152 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[10]/B | | | | +Epoch 1 || ertAttention[attenti | | | | +Epoch 1 || on]/BertSelfAttentio | | | | +Epoch 1 || n[self]/NNCFLinear[v | | | | +Epoch 1 || alue]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.147 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[10]/B | | | | +Epoch 1 || ertAttention[attenti | | | | +Epoch 1 || on]/BertSelfOutput[o | | | | +Epoch 1 || utput]/NNCFLinear[de | | | | +Epoch 1 || nse]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072, 768] | 0.274 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[10]/B | | | | +Epoch 1 || ertIntermediate[inte | | | | +Epoch 1 || rmediate]/NNCFLinear | | | | +Epoch 1 || [dense]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 3072] | 0.279 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[10]/B | | | | +Epoch 1 || ertOutput[output]/NN | | | | +Epoch 1 || CFLinear[dense]/line | | | | +Epoch 1 || ar_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.142 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[11]/B | | | | +Epoch 1 || ertAttention[attenti | | | | +Epoch 1 || on]/BertSelfAttentio | | | | +Epoch 1 || n[self]/NNCFLinear[q | | | | +Epoch 1 || uery]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.141 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[11]/B | | | | +Epoch 1 || ertAttention[attenti | | | | +Epoch 1 || on]/BertSelfAttentio | | | | +Epoch 1 || n[self]/NNCFLinear[k | | | | +Epoch 1 || ey]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.146 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[11]/B | | | | +Epoch 1 || ertAttention[attenti | | | | +Epoch 1 || on]/BertSelfAttentio | | | | +Epoch 1 || n[self]/NNCFLinear[v | | | | +Epoch 1 || alue]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.141 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[11]/B | | | | +Epoch 1 || ertAttention[attenti | | | | +Epoch 1 || on]/BertSelfOutput[o | | | | +Epoch 1 || utput]/NNCFLinear[de | | | | +Epoch 1 || nse]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072, 768] | 0.272 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[11]/B | | | | +Epoch 1 || ertIntermediate[inte | | | | +Epoch 1 || rmediate]/NNCFLinear | | | | +Epoch 1 || [dense]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 3072] | 0.275 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[11]/B | | | | +Epoch 1 || ertOutput[output]/NN | | | | +Epoch 1 || CFLinear[dense]/line | | | | +Epoch 1 || ar_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 | +Epoch 1 |Statistics of the magnitude sparsity algorithm: +Epoch 1 |+----------------------------------------------------------------------+-------+ +Epoch 1 || Statistic's name | Value | +Epoch 1 |+======================================================================+=======+ +Epoch 1 || A target level of the sparsity for the algorithm for the current | 0.238 | +Epoch 1 || epoch | | +Epoch 1 |+----------------------------------------------------------------------+-------+ +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || Layer's name | Sparsity threshold | +Epoch 1 |+=========================================================+====================+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 1 || near_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 1 || inear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 1 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 1 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 1 || near_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 1 || inear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 1 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 1 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 1 || near_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 1 || inear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 1 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 1 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 1 || near_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 1 || inear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 1 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 1 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 1 || near_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 1 || inear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 1 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 1 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 1 || near_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 1 || inear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 1 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 1 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 1 || near_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 1 || inear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 1 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 1 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 1 || near_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 1 || inear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 1 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 1 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 1 || near_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 1 || inear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 1 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 1 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 1 || near_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 1 || inear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 1 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 1 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 1 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 1 || /linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 1 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 1 || inear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 1 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 1 || /linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 1 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 1 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 1 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 1 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 1 || /linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 1 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 1 || inear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 1 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 1 || /linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 1 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 1 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 1 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 2 |+--------------------------------+-------+ +Epoch 2 || Statistic's name | Value | +Epoch 2 |+================================+=======+ +Epoch 2 || Ratio of enabled quantizations | 100 | +Epoch 2 |+--------------------------------+-------+ +Epoch 2 | +Epoch 2 |Statistics of the quantization share: +Epoch 2 |+----------------------------------+--------------------+ +Epoch 2 || Statistic's name | Value | +Epoch 2 |+==================================+====================+ +Epoch 2 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 2 |+----------------------------------+--------------------+ +Epoch 2 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 2 |+----------------------------------+--------------------+ +Epoch 2 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 2 |+----------------------------------+--------------------+ +Epoch 2 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 2 |+----------------------------------+--------------------+ +Epoch 2 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 2 |+----------------------------------+--------------------+ +Epoch 2 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 2 |+----------------------------------+--------------------+ +Epoch 2 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 2 |+----------------------------------+--------------------+ +Epoch 2 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 2 |+----------------------------------+--------------------+ +Epoch 2 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 2 |+----------------------------------+--------------------+ +Epoch 2 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 2 |+----------------------------------+--------------------+ +Epoch 2 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 2 |+----------------------------------+--------------------+ +Epoch 2 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 2 |+----------------------------------+--------------------+ +Epoch 2 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 2 |+----------------------------------+--------------------+ +Epoch 2 | +Epoch 2 |Statistics of the bitwidth distribution: +Epoch 2 |+--------------+---------------------+--------------------+--------------------+ +Epoch 2 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 2 || | WQs | Placed AQs | Qs | +Epoch 2 |+==============+=====================+====================+====================+ +Epoch 2 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 2 || | | | 173) | +Epoch 2 |+--------------+---------------------+--------------------+--------------------+ +Epoch 2 | +Epoch 2 |Statistics of the sparsified model: +Epoch 2 |+-----------------------------------------+-------+ +Epoch 2 || Statistic's name | Value | +Epoch 2 |+=========================================+=======+ +Epoch 2 || Sparsity level of the whole model | 0.329 | +Epoch 2 |+-----------------------------------------+-------+ +Epoch 2 || Sparsity level of all sparsified layers | 0.424 | +Epoch 2 |+-----------------------------------------+-------+ +Epoch 2 | +Epoch 2 |Statistics by sparsified layers: +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 2 |+======================+================+================+=====================+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.271 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[0]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.276 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[0]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.278 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[0]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.287 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[0]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072, 768] | 0.494 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[0]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 3072] | 0.506 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[0]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.267 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[1]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.270 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[1]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.274 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[1]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.286 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[1]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072, 768] | 0.495 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[1]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 3072] | 0.509 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[1]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.280 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[2]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.280 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[2]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.279 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[2]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.286 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[2]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072, 768] | 0.497 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[2]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 3072] | 0.509 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[2]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.266 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[3]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.269 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[3]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.277 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[3]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.281 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[3]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072, 768] | 0.497 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[3]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 3072] | 0.514 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[3]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.264 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[4]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.264 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[4]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.271 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[4]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.278 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[4]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072, 768] | 0.499 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[4]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 3072] | 0.518 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[4]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.264 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[5]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.265 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[5]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.276 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[5]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.278 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[5]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072, 768] | 0.498 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[5]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 3072] | 0.517 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[5]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.262 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[6]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.263 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[6]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.275 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[6]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.278 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[6]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072, 768] | 0.497 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[6]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 3072] | 0.513 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[6]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.263 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[7]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.262 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[7]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.271 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[7]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.273 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[7]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072, 768] | 0.493 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[7]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 3072] | 0.506 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[7]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.262 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[8]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.262 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[8]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.267 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[8]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.268 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[8]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072, 768] | 0.490 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[8]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 3072] | 0.501 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[8]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.259 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[9]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.261 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[9]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.264 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[9]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.264 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[9]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072, 768] | 0.494 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[9]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 3072] | 0.503 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[9]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.260 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[10]/B | | | | +Epoch 2 || ertAttention[attenti | | | | +Epoch 2 || on]/BertSelfAttentio | | | | +Epoch 2 || n[self]/NNCFLinear[q | | | | +Epoch 2 || uery]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.261 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[10]/B | | | | +Epoch 2 || ertAttention[attenti | | | | +Epoch 2 || on]/BertSelfAttentio | | | | +Epoch 2 || n[self]/NNCFLinear[k | | | | +Epoch 2 || ey]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.274 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[10]/B | | | | +Epoch 2 || ertAttention[attenti | | | | +Epoch 2 || on]/BertSelfAttentio | | | | +Epoch 2 || n[self]/NNCFLinear[v | | | | +Epoch 2 || alue]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.268 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[10]/B | | | | +Epoch 2 || ertAttention[attenti | | | | +Epoch 2 || on]/BertSelfOutput[o | | | | +Epoch 2 || utput]/NNCFLinear[de | | | | +Epoch 2 || nse]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072, 768] | 0.487 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[10]/B | | | | +Epoch 2 || ertIntermediate[inte | | | | +Epoch 2 || rmediate]/NNCFLinear | | | | +Epoch 2 || [dense]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 3072] | 0.495 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[10]/B | | | | +Epoch 2 || ertOutput[output]/NN | | | | +Epoch 2 || CFLinear[dense]/line | | | | +Epoch 2 || ar_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.261 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[11]/B | | | | +Epoch 2 || ertAttention[attenti | | | | +Epoch 2 || on]/BertSelfAttentio | | | | +Epoch 2 || n[self]/NNCFLinear[q | | | | +Epoch 2 || uery]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.259 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[11]/B | | | | +Epoch 2 || ertAttention[attenti | | | | +Epoch 2 || on]/BertSelfAttentio | | | | +Epoch 2 || n[self]/NNCFLinear[k | | | | +Epoch 2 || ey]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.264 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[11]/B | | | | +Epoch 2 || ertAttention[attenti | | | | +Epoch 2 || on]/BertSelfAttentio | | | | +Epoch 2 || n[self]/NNCFLinear[v | | | | +Epoch 2 || alue]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.259 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[11]/B | | | | +Epoch 2 || ertAttention[attenti | | | | +Epoch 2 || on]/BertSelfOutput[o | | | | +Epoch 2 || utput]/NNCFLinear[de | | | | +Epoch 2 || nse]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072, 768] | 0.487 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[11]/B | | | | +Epoch 2 || ertIntermediate[inte | | | | +Epoch 2 || rmediate]/NNCFLinear | | | | +Epoch 2 || [dense]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 3072] | 0.490 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[11]/B | | | | +Epoch 2 || ertOutput[output]/NN | | | | +Epoch 2 || CFLinear[dense]/line | | | | +Epoch 2 || ar_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 | +Epoch 2 |Statistics of the magnitude sparsity algorithm: +Epoch 2 |+----------------------------------------------------------------------+-------+ +Epoch 2 || Statistic's name | Value | +Epoch 2 |+======================================================================+=======+ +Epoch 2 || A target level of the sparsity for the algorithm for the current | 0.424 | +Epoch 2 || epoch | | +Epoch 2 |+----------------------------------------------------------------------+-------+ +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || Layer's name | Sparsity threshold | +Epoch 2 |+=========================================================+====================+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 2 || near_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 2 || inear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 2 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 2 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 2 || near_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 2 || inear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 2 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 2 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 2 || near_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 2 || inear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 2 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 2 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 2 || near_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 2 || inear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 2 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 2 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 2 || near_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 2 || inear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 2 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 2 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 2 || near_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 2 || inear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 2 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 2 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 2 || near_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 2 || inear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 2 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 2 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 2 || near_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 2 || inear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 2 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 2 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 2 || near_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 2 || inear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 2 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 2 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 2 || near_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 2 || inear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 2 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 2 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 2 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 2 || /linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 2 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 2 || inear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 2 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 2 || /linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 2 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 2 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 2 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 2 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 2 || /linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 2 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 2 || inear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 2 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 2 || /linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 2 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 2 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 2 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 3 |+--------------------------------+-------+ +Epoch 3 || Statistic's name | Value | +Epoch 3 |+================================+=======+ +Epoch 3 || Ratio of enabled quantizations | 100 | +Epoch 3 |+--------------------------------+-------+ +Epoch 3 | +Epoch 3 |Statistics of the quantization share: +Epoch 3 |+----------------------------------+--------------------+ +Epoch 3 || Statistic's name | Value | +Epoch 3 |+==================================+====================+ +Epoch 3 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 3 |+----------------------------------+--------------------+ +Epoch 3 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 3 |+----------------------------------+--------------------+ +Epoch 3 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 3 |+----------------------------------+--------------------+ +Epoch 3 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 3 |+----------------------------------+--------------------+ +Epoch 3 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 3 |+----------------------------------+--------------------+ +Epoch 3 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 3 |+----------------------------------+--------------------+ +Epoch 3 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 3 |+----------------------------------+--------------------+ +Epoch 3 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 3 |+----------------------------------+--------------------+ +Epoch 3 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 3 |+----------------------------------+--------------------+ +Epoch 3 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 3 |+----------------------------------+--------------------+ +Epoch 3 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 3 |+----------------------------------+--------------------+ +Epoch 3 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 3 |+----------------------------------+--------------------+ +Epoch 3 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 3 |+----------------------------------+--------------------+ +Epoch 3 | +Epoch 3 |Statistics of the bitwidth distribution: +Epoch 3 |+--------------+---------------------+--------------------+--------------------+ +Epoch 3 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 3 || | WQs | Placed AQs | Qs | +Epoch 3 |+==============+=====================+====================+====================+ +Epoch 3 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 3 || | | | 173) | +Epoch 3 |+--------------+---------------------+--------------------+--------------------+ +Epoch 3 | +Epoch 3 |Statistics of the sparsified model: +Epoch 3 |+-----------------------------------------+-------+ +Epoch 3 || Statistic's name | Value | +Epoch 3 |+=========================================+=======+ +Epoch 3 || Sparsity level of the whole model | 0.437 | +Epoch 3 |+-----------------------------------------+-------+ +Epoch 3 || Sparsity level of all sparsified layers | 0.563 | +Epoch 3 |+-----------------------------------------+-------+ +Epoch 3 | +Epoch 3 |Statistics by sparsified layers: +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 3 |+======================+================+================+=====================+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.374 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[0]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.380 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[0]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.377 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[0]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.391 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[0]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072, 768] | 0.651 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[0]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 3072] | 0.664 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[0]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.369 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[1]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.373 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[1]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.375 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[1]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.391 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[1]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072, 768] | 0.653 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[1]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 3072] | 0.667 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[1]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.386 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[2]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.386 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[2]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.380 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[2]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.388 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[2]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072, 768] | 0.655 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[2]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 3072] | 0.667 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[2]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.367 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[3]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.369 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[3]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.378 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[3]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.383 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[3]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072, 768] | 0.655 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[3]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 3072] | 0.672 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[3]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.365 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[4]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.365 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[4]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.372 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[4]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.381 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[4]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072, 768] | 0.656 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[4]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 3072] | 0.675 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[4]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.364 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[5]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.365 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[5]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.377 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[5]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.380 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[5]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072, 768] | 0.656 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[5]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 3072] | 0.674 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[5]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.362 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[6]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.364 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[6]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.377 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[6]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.380 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[6]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072, 768] | 0.655 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[6]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 3072] | 0.670 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[6]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.363 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[7]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.363 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[7]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.373 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[7]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.376 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[7]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072, 768] | 0.651 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[7]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 3072] | 0.664 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[7]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.362 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[8]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.363 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[8]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.368 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[8]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.371 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[8]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072, 768] | 0.649 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[8]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 3072] | 0.660 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[8]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.360 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[9]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.362 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[9]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.366 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[9]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.366 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[9]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072, 768] | 0.653 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[9]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 3072] | 0.663 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[9]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.361 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[10]/B | | | | +Epoch 3 || ertAttention[attenti | | | | +Epoch 3 || on]/BertSelfAttentio | | | | +Epoch 3 || n[self]/NNCFLinear[q | | | | +Epoch 3 || uery]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.362 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[10]/B | | | | +Epoch 3 || ertAttention[attenti | | | | +Epoch 3 || on]/BertSelfAttentio | | | | +Epoch 3 || n[self]/NNCFLinear[k | | | | +Epoch 3 || ey]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.377 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[10]/B | | | | +Epoch 3 || ertAttention[attenti | | | | +Epoch 3 || on]/BertSelfAttentio | | | | +Epoch 3 || n[self]/NNCFLinear[v | | | | +Epoch 3 || alue]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.371 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[10]/B | | | | +Epoch 3 || ertAttention[attenti | | | | +Epoch 3 || on]/BertSelfOutput[o | | | | +Epoch 3 || utput]/NNCFLinear[de | | | | +Epoch 3 || nse]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072, 768] | 0.647 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[10]/B | | | | +Epoch 3 || ertIntermediate[inte | | | | +Epoch 3 || rmediate]/NNCFLinear | | | | +Epoch 3 || [dense]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 3072] | 0.655 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[10]/B | | | | +Epoch 3 || ertOutput[output]/NN | | | | +Epoch 3 || CFLinear[dense]/line | | | | +Epoch 3 || ar_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.363 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[11]/B | | | | +Epoch 3 || ertAttention[attenti | | | | +Epoch 3 || on]/BertSelfAttentio | | | | +Epoch 3 || n[self]/NNCFLinear[q | | | | +Epoch 3 || uery]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.360 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[11]/B | | | | +Epoch 3 || ertAttention[attenti | | | | +Epoch 3 || on]/BertSelfAttentio | | | | +Epoch 3 || n[self]/NNCFLinear[k | | | | +Epoch 3 || ey]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.367 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[11]/B | | | | +Epoch 3 || ertAttention[attenti | | | | +Epoch 3 || on]/BertSelfAttentio | | | | +Epoch 3 || n[self]/NNCFLinear[v | | | | +Epoch 3 || alue]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.361 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[11]/B | | | | +Epoch 3 || ertAttention[attenti | | | | +Epoch 3 || on]/BertSelfOutput[o | | | | +Epoch 3 || utput]/NNCFLinear[de | | | | +Epoch 3 || nse]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072, 768] | 0.645 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[11]/B | | | | +Epoch 3 || ertIntermediate[inte | | | | +Epoch 3 || rmediate]/NNCFLinear | | | | +Epoch 3 || [dense]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 3072] | 0.649 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[11]/B | | | | +Epoch 3 || ertOutput[output]/NN | | | | +Epoch 3 || CFLinear[dense]/line | | | | +Epoch 3 || ar_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 | +Epoch 3 |Statistics of the magnitude sparsity algorithm: +Epoch 3 |+----------------------------------------------------------------------+-------+ +Epoch 3 || Statistic's name | Value | +Epoch 3 |+======================================================================+=======+ +Epoch 3 || A target level of the sparsity for the algorithm for the current | 0.563 | +Epoch 3 || epoch | | +Epoch 3 |+----------------------------------------------------------------------+-------+ +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || Layer's name | Sparsity threshold | +Epoch 3 |+=========================================================+====================+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 3 || near_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 3 || inear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 3 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 3 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 3 || near_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 3 || inear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 3 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 3 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 3 || near_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 3 || inear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 3 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 3 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 3 || near_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 3 || inear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 3 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 3 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 3 || near_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 3 || inear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 3 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 3 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 3 || near_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 3 || inear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 3 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 3 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 3 || near_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 3 || inear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 3 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 3 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 3 || near_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 3 || inear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 3 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 3 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 3 || near_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 3 || inear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 3 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 3 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 3 || near_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 3 || inear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 3 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 3 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 3 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 3 || /linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 3 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 3 || inear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 3 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 3 || /linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 3 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 3 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 3 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 3 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 3 || /linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 3 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 3 || inear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 3 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 3 || /linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 3 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 3 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 3 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 4 |+--------------------------------+-------+ +Epoch 4 || Statistic's name | Value | +Epoch 4 |+================================+=======+ +Epoch 4 || Ratio of enabled quantizations | 100 | +Epoch 4 |+--------------------------------+-------+ +Epoch 4 | +Epoch 4 |Statistics of the quantization share: +Epoch 4 |+----------------------------------+--------------------+ +Epoch 4 || Statistic's name | Value | +Epoch 4 |+==================================+====================+ +Epoch 4 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 4 |+----------------------------------+--------------------+ +Epoch 4 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 4 |+----------------------------------+--------------------+ +Epoch 4 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 4 |+----------------------------------+--------------------+ +Epoch 4 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 4 |+----------------------------------+--------------------+ +Epoch 4 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 4 |+----------------------------------+--------------------+ +Epoch 4 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 4 |+----------------------------------+--------------------+ +Epoch 4 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 4 |+----------------------------------+--------------------+ +Epoch 4 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 4 |+----------------------------------+--------------------+ +Epoch 4 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 4 |+----------------------------------+--------------------+ +Epoch 4 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 4 |+----------------------------------+--------------------+ +Epoch 4 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 4 |+----------------------------------+--------------------+ +Epoch 4 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 4 |+----------------------------------+--------------------+ +Epoch 4 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 4 |+----------------------------------+--------------------+ +Epoch 4 | +Epoch 4 |Statistics of the bitwidth distribution: +Epoch 4 |+--------------+---------------------+--------------------+--------------------+ +Epoch 4 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 4 || | WQs | Placed AQs | Qs | +Epoch 4 |+==============+=====================+====================+====================+ +Epoch 4 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 4 || | | | 173) | +Epoch 4 |+--------------+---------------------+--------------------+--------------------+ +Epoch 4 | +Epoch 4 |Statistics of the sparsified model: +Epoch 4 |+-----------------------------------------+-------+ +Epoch 4 || Statistic's name | Value | +Epoch 4 |+=========================================+=======+ +Epoch 4 || Sparsity level of the whole model | 0.514 | +Epoch 4 |+-----------------------------------------+-------+ +Epoch 4 || Sparsity level of all sparsified layers | 0.663 | +Epoch 4 |+-----------------------------------------+-------+ +Epoch 4 | +Epoch 4 |Statistics by sparsified layers: +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 4 |+======================+================+================+=====================+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.458 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[0]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.466 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[0]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.460 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[0]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.475 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[0]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072, 768] | 0.760 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[0]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 3072] | 0.771 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[0]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.454 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[1]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.457 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[1]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.458 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[1]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.477 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[1]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072, 768] | 0.761 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[1]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 3072] | 0.774 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[1]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.473 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[2]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.473 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[2]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.462 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[2]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.472 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[2]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072, 768] | 0.763 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[2]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 3072] | 0.773 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[2]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.451 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[3]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.453 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[3]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.461 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[3]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.467 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[3]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072, 768] | 0.763 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[3]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 3072] | 0.777 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[3]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.449 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[4]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.449 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[4]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.456 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[4]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.465 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[4]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072, 768] | 0.764 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[4]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 3072] | 0.779 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[4]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.448 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[5]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.450 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[5]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.461 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[5]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.465 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[5]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072, 768] | 0.764 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[5]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 3072] | 0.778 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[5]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.446 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[6]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.447 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[6]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.461 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[6]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.465 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[6]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072, 768] | 0.763 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[6]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 3072] | 0.776 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[6]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.447 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[7]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.447 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[7]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.456 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[7]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.460 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[7]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072, 768] | 0.760 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[7]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 3072] | 0.771 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[7]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.446 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[8]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.447 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[8]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.450 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[8]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.454 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[8]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072, 768] | 0.758 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[8]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 3072] | 0.768 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[8]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.443 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[9]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.445 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[9]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.448 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[9]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.448 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[9]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072, 768] | 0.763 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[9]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 3072] | 0.771 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[9]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.445 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[10]/B | | | | +Epoch 4 || ertAttention[attenti | | | | +Epoch 4 || on]/BertSelfAttentio | | | | +Epoch 4 || n[self]/NNCFLinear[q | | | | +Epoch 4 || uery]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.446 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[10]/B | | | | +Epoch 4 || ertAttention[attenti | | | | +Epoch 4 || on]/BertSelfAttentio | | | | +Epoch 4 || n[self]/NNCFLinear[k | | | | +Epoch 4 || ey]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.461 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[10]/B | | | | +Epoch 4 || ertAttention[attenti | | | | +Epoch 4 || on]/BertSelfAttentio | | | | +Epoch 4 || n[self]/NNCFLinear[v | | | | +Epoch 4 || alue]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.455 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[10]/B | | | | +Epoch 4 || ertAttention[attenti | | | | +Epoch 4 || on]/BertSelfOutput[o | | | | +Epoch 4 || utput]/NNCFLinear[de | | | | +Epoch 4 || nse]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072, 768] | 0.758 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[10]/B | | | | +Epoch 4 || ertIntermediate[inte | | | | +Epoch 4 || rmediate]/NNCFLinear | | | | +Epoch 4 || [dense]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 3072] | 0.765 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[10]/B | | | | +Epoch 4 || ertOutput[output]/NN | | | | +Epoch 4 || CFLinear[dense]/line | | | | +Epoch 4 || ar_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.448 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[11]/B | | | | +Epoch 4 || ertAttention[attenti | | | | +Epoch 4 || on]/BertSelfAttentio | | | | +Epoch 4 || n[self]/NNCFLinear[q | | | | +Epoch 4 || uery]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.444 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[11]/B | | | | +Epoch 4 || ertAttention[attenti | | | | +Epoch 4 || on]/BertSelfAttentio | | | | +Epoch 4 || n[self]/NNCFLinear[k | | | | +Epoch 4 || ey]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.450 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[11]/B | | | | +Epoch 4 || ertAttention[attenti | | | | +Epoch 4 || on]/BertSelfAttentio | | | | +Epoch 4 || n[self]/NNCFLinear[v | | | | +Epoch 4 || alue]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.445 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[11]/B | | | | +Epoch 4 || ertAttention[attenti | | | | +Epoch 4 || on]/BertSelfOutput[o | | | | +Epoch 4 || utput]/NNCFLinear[de | | | | +Epoch 4 || nse]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072, 768] | 0.756 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[11]/B | | | | +Epoch 4 || ertIntermediate[inte | | | | +Epoch 4 || rmediate]/NNCFLinear | | | | +Epoch 4 || [dense]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 3072] | 0.759 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[11]/B | | | | +Epoch 4 || ertOutput[output]/NN | | | | +Epoch 4 || CFLinear[dense]/line | | | | +Epoch 4 || ar_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 | +Epoch 4 |Statistics of the magnitude sparsity algorithm: +Epoch 4 |+----------------------------------------------------------------------+-------+ +Epoch 4 || Statistic's name | Value | +Epoch 4 |+======================================================================+=======+ +Epoch 4 || A target level of the sparsity for the algorithm for the current | 0.663 | +Epoch 4 || epoch | | +Epoch 4 |+----------------------------------------------------------------------+-------+ +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || Layer's name | Sparsity threshold | +Epoch 4 |+=========================================================+====================+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 4 || near_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 4 || inear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 4 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 4 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 4 || near_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 4 || inear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 4 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 4 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 4 || near_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 4 || inear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 4 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 4 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 4 || near_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 4 || inear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 4 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 4 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 4 || near_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 4 || inear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 4 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 4 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 4 || near_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 4 || inear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 4 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 4 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 4 || near_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 4 || inear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 4 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 4 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 4 || near_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 4 || inear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 4 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 4 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 4 || near_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 4 || inear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 4 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 4 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 4 || near_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 4 || inear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 4 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 4 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 4 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 4 || /linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 4 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 4 || inear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 4 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 4 || /linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 4 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 4 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 4 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 4 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 4 || /linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 4 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 4 || inear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 4 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 4 || /linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 4 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 4 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 4 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 5 |+--------------------------------+-------+ +Epoch 5 || Statistic's name | Value | +Epoch 5 |+================================+=======+ +Epoch 5 || Ratio of enabled quantizations | 100 | +Epoch 5 |+--------------------------------+-------+ +Epoch 5 | +Epoch 5 |Statistics of the quantization share: +Epoch 5 |+----------------------------------+--------------------+ +Epoch 5 || Statistic's name | Value | +Epoch 5 |+==================================+====================+ +Epoch 5 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 5 |+----------------------------------+--------------------+ +Epoch 5 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 5 |+----------------------------------+--------------------+ +Epoch 5 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 5 |+----------------------------------+--------------------+ +Epoch 5 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 5 |+----------------------------------+--------------------+ +Epoch 5 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 5 |+----------------------------------+--------------------+ +Epoch 5 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 5 |+----------------------------------+--------------------+ +Epoch 5 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 5 |+----------------------------------+--------------------+ +Epoch 5 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 5 |+----------------------------------+--------------------+ +Epoch 5 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 5 |+----------------------------------+--------------------+ +Epoch 5 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 5 |+----------------------------------+--------------------+ +Epoch 5 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 5 |+----------------------------------+--------------------+ +Epoch 5 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 5 |+----------------------------------+--------------------+ +Epoch 5 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 5 |+----------------------------------+--------------------+ +Epoch 5 | +Epoch 5 |Statistics of the bitwidth distribution: +Epoch 5 |+--------------+---------------------+--------------------+--------------------+ +Epoch 5 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 5 || | WQs | Placed AQs | Qs | +Epoch 5 |+==============+=====================+====================+====================+ +Epoch 5 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 5 || | | | 173) | +Epoch 5 |+--------------+---------------------+--------------------+--------------------+ +Epoch 5 | +Epoch 5 |Statistics of the sparsified model: +Epoch 5 |+-----------------------------------------+-------+ +Epoch 5 || Statistic's name | Value | +Epoch 5 |+=========================================+=======+ +Epoch 5 || Sparsity level of the whole model | 0.566 | +Epoch 5 |+-----------------------------------------+-------+ +Epoch 5 || Sparsity level of all sparsified layers | 0.730 | +Epoch 5 |+-----------------------------------------+-------+ +Epoch 5 | +Epoch 5 |Statistics by sparsified layers: +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 5 |+======================+================+================+=====================+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.523 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[0]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[qu | | | | +Epoch 5 || ery]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.531 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[0]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[ke | | | | +Epoch 5 || y]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.522 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[0]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[va | | | | +Epoch 5 || lue]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.540 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[0]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfOutput[ou | | | | +Epoch 5 || tput]/NNCFLinear[den | | | | +Epoch 5 || se]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [3072, 768] | 0.829 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[0]/Be | | | | +Epoch 5 || rtIntermediate[inter | | | | +Epoch 5 || mediate]/NNCFLinear[ | | | | +Epoch 5 || dense]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 3072] | 0.838 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[0]/Be | | | | +Epoch 5 || rtOutput[output]/NNC | | | | +Epoch 5 || FLinear[dense]/linea | | | | +Epoch 5 || r_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.519 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[1]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[qu | | | | +Epoch 5 || ery]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.523 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[1]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[ke | | | | +Epoch 5 || y]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.522 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[1]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[va | | | | +Epoch 5 || lue]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.542 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[1]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfOutput[ou | | | | +Epoch 5 || tput]/NNCFLinear[den | | | | +Epoch 5 || se]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [3072, 768] | 0.830 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[1]/Be | | | | +Epoch 5 || rtIntermediate[inter | | | | +Epoch 5 || mediate]/NNCFLinear[ | | | | +Epoch 5 || dense]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 3072] | 0.840 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[1]/Be | | | | +Epoch 5 || rtOutput[output]/NNC | | | | +Epoch 5 || FLinear[dense]/linea | | | | +Epoch 5 || r_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.539 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[2]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[qu | | | | +Epoch 5 || ery]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.539 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[2]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[ke | | | | +Epoch 5 || y]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.525 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[2]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[va | | | | +Epoch 5 || lue]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.536 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[2]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfOutput[ou | | | | +Epoch 5 || tput]/NNCFLinear[den | | | | +Epoch 5 || se]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [3072, 768] | 0.831 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[2]/Be | | | | +Epoch 5 || rtIntermediate[inter | | | | +Epoch 5 || mediate]/NNCFLinear[ | | | | +Epoch 5 || dense]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 3072] | 0.839 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[2]/Be | | | | +Epoch 5 || rtOutput[output]/NNC | | | | +Epoch 5 || FLinear[dense]/linea | | | | +Epoch 5 || r_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.516 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[3]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[qu | | | | +Epoch 5 || ery]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.518 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[3]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[ke | | | | +Epoch 5 || y]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.525 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[3]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[va | | | | +Epoch 5 || lue]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.531 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[3]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfOutput[ou | | | | +Epoch 5 || tput]/NNCFLinear[den | | | | +Epoch 5 || se]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [3072, 768] | 0.832 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[3]/Be | | | | +Epoch 5 || rtIntermediate[inter | | | | +Epoch 5 || mediate]/NNCFLinear[ | | | | +Epoch 5 || dense]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 3072] | 0.843 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[3]/Be | | | | +Epoch 5 || rtOutput[output]/NNC | | | | +Epoch 5 || FLinear[dense]/linea | | | | +Epoch 5 || r_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.513 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[4]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[qu | | | | +Epoch 5 || ery]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.514 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[4]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[ke | | | | +Epoch 5 || y]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.520 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[4]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[va | | | | +Epoch 5 || lue]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.530 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[4]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfOutput[ou | | | | +Epoch 5 || tput]/NNCFLinear[den | | | | +Epoch 5 || se]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [3072, 768] | 0.832 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[4]/Be | | | | +Epoch 5 || rtIntermediate[inter | | | | +Epoch 5 || mediate]/NNCFLinear[ | | | | +Epoch 5 || dense]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 3072] | 0.843 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[4]/Be | | | | +Epoch 5 || rtOutput[output]/NNC | | | | +Epoch 5 || FLinear[dense]/linea | | | | +Epoch 5 || r_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.513 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[5]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[qu | | | | +Epoch 5 || ery]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.514 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[5]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[ke | | | | +Epoch 5 || y]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.525 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[5]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[va | | | | +Epoch 5 || lue]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.530 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[5]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfOutput[ou | | | | +Epoch 5 || tput]/NNCFLinear[den | | | | +Epoch 5 || se]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [3072, 768] | 0.832 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[5]/Be | | | | +Epoch 5 || rtIntermediate[inter | | | | +Epoch 5 || mediate]/NNCFLinear[ | | | | +Epoch 5 || dense]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 3072] | 0.843 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[5]/Be | | | | +Epoch 5 || rtOutput[output]/NNC | | | | +Epoch 5 || FLinear[dense]/linea | | | | +Epoch 5 || r_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.511 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[6]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[qu | | | | +Epoch 5 || ery]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.512 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[6]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[ke | | | | +Epoch 5 || y]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.524 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[6]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[va | | | | +Epoch 5 || lue]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.530 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[6]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfOutput[ou | | | | +Epoch 5 || tput]/NNCFLinear[den | | | | +Epoch 5 || se]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [3072, 768] | 0.831 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[6]/Be | | | | +Epoch 5 || rtIntermediate[inter | | | | +Epoch 5 || mediate]/NNCFLinear[ | | | | +Epoch 5 || dense]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 3072] | 0.840 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[6]/Be | | | | +Epoch 5 || rtOutput[output]/NNC | | | | +Epoch 5 || FLinear[dense]/linea | | | | +Epoch 5 || r_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.511 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[7]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[qu | | | | +Epoch 5 || ery]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.512 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[7]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[ke | | | | +Epoch 5 || y]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.519 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[7]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[va | | | | +Epoch 5 || lue]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.525 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[7]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfOutput[ou | | | | +Epoch 5 || tput]/NNCFLinear[den | | | | +Epoch 5 || se]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [3072, 768] | 0.829 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[7]/Be | | | | +Epoch 5 || rtIntermediate[inter | | | | +Epoch 5 || mediate]/NNCFLinear[ | | | | +Epoch 5 || dense]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 3072] | 0.837 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[7]/Be | | | | +Epoch 5 || rtOutput[output]/NNC | | | | +Epoch 5 || FLinear[dense]/linea | | | | +Epoch 5 || r_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.511 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[8]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[qu | | | | +Epoch 5 || ery]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.512 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[8]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[ke | | | | +Epoch 5 || y]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.514 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[8]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[va | | | | +Epoch 5 || lue]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.518 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[8]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfOutput[ou | | | | +Epoch 5 || tput]/NNCFLinear[den | | | | +Epoch 5 || se]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [3072, 768] | 0.828 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[8]/Be | | | | +Epoch 5 || rtIntermediate[inter | | | | +Epoch 5 || mediate]/NNCFLinear[ | | | | +Epoch 5 || dense]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 3072] | 0.836 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[8]/Be | | | | +Epoch 5 || rtOutput[output]/NNC | | | | +Epoch 5 || FLinear[dense]/linea | | | | +Epoch 5 || r_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.508 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[9]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[qu | | | | +Epoch 5 || ery]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.510 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[9]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[ke | | | | +Epoch 5 || y]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.513 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[9]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[va | | | | +Epoch 5 || lue]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.514 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[9]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfOutput[ou | | | | +Epoch 5 || tput]/NNCFLinear[den | | | | +Epoch 5 || se]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [3072, 768] | 0.833 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[9]/Be | | | | +Epoch 5 || rtIntermediate[inter | | | | +Epoch 5 || mediate]/NNCFLinear[ | | | | +Epoch 5 || dense]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 3072] | 0.840 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[9]/Be | | | | +Epoch 5 || rtOutput[output]/NNC | | | | +Epoch 5 || FLinear[dense]/linea | | | | +Epoch 5 || r_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.510 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[10]/B | | | | +Epoch 5 || ertAttention[attenti | | | | +Epoch 5 || on]/BertSelfAttentio | | | | +Epoch 5 || n[self]/NNCFLinear[q | | | | +Epoch 5 || uery]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.510 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[10]/B | | | | +Epoch 5 || ertAttention[attenti | | | | +Epoch 5 || on]/BertSelfAttentio | | | | +Epoch 5 || n[self]/NNCFLinear[k | | | | +Epoch 5 || ey]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.526 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[10]/B | | | | +Epoch 5 || ertAttention[attenti | | | | +Epoch 5 || on]/BertSelfAttentio | | | | +Epoch 5 || n[self]/NNCFLinear[v | | | | +Epoch 5 || alue]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.519 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[10]/B | | | | +Epoch 5 || ertAttention[attenti | | | | +Epoch 5 || on]/BertSelfOutput[o | | | | +Epoch 5 || utput]/NNCFLinear[de | | | | +Epoch 5 || nse]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [3072, 768] | 0.829 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[10]/B | | | | +Epoch 5 || ertIntermediate[inte | | | | +Epoch 5 || rmediate]/NNCFLinear | | | | +Epoch 5 || [dense]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 3072] | 0.834 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[10]/B | | | | +Epoch 5 || ertOutput[output]/NN | | | | +Epoch 5 || CFLinear[dense]/line | | | | +Epoch 5 || ar_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.513 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[11]/B | | | | +Epoch 5 || ertAttention[attenti | | | | +Epoch 5 || on]/BertSelfAttentio | | | | +Epoch 5 || n[self]/NNCFLinear[q | | | | +Epoch 5 || uery]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.509 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[11]/B | | | | +Epoch 5 || ertAttention[attenti | | | | +Epoch 5 || on]/BertSelfAttentio | | | | +Epoch 5 || n[self]/NNCFLinear[k | | | | +Epoch 5 || ey]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.516 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[11]/B | | | | +Epoch 5 || ertAttention[attenti | | | | +Epoch 5 || on]/BertSelfAttentio | | | | +Epoch 5 || n[self]/NNCFLinear[v | | | | +Epoch 5 || alue]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.510 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[11]/B | | | | +Epoch 5 || ertAttention[attenti | | | | +Epoch 5 || on]/BertSelfOutput[o | | | | +Epoch 5 || utput]/NNCFLinear[de | | | | +Epoch 5 || nse]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [3072, 768] | 0.827 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[11]/B | | | | +Epoch 5 || ertIntermediate[inte | | | | +Epoch 5 || rmediate]/NNCFLinear | | | | +Epoch 5 || [dense]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 3072] | 0.830 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[11]/B | | | | +Epoch 5 || ertOutput[output]/NN | | | | +Epoch 5 || CFLinear[dense]/line | | | | +Epoch 5 || ar_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 | +Epoch 5 |Statistics of the magnitude sparsity algorithm: +Epoch 5 |+----------------------------------------------------------------------+-------+ +Epoch 5 || Statistic's name | Value | +Epoch 5 |+======================================================================+=======+ +Epoch 5 || A target level of the sparsity for the algorithm for the current | 0.730 | +Epoch 5 || epoch | | +Epoch 5 |+----------------------------------------------------------------------+-------+ +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || Layer's name | Sparsity threshold | +Epoch 5 |+=========================================================+====================+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 5 || near_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 5 || inear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 5 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 5 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 5 || near_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 5 || inear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 5 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 5 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 5 || near_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 5 || inear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 5 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 5 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 5 || near_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 5 || inear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 5 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 5 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 5 || near_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 5 || inear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 5 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 5 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 5 || near_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 5 || inear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 5 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 5 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 5 || near_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 5 || inear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 5 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 5 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 5 || near_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 5 || inear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 5 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 5 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 5 || near_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 5 || inear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 5 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 5 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 5 || near_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 5 || inear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 5 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 5 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 5 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 5 || /linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 5 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 5 || inear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 5 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 5 || /linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 5 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 5 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 5 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 5 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 5 || /linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 5 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 5 || inear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 5 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 5 || /linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 5 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 5 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 5 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 6 |+--------------------------------+-------+ +Epoch 6 || Statistic's name | Value | +Epoch 6 |+================================+=======+ +Epoch 6 || Ratio of enabled quantizations | 100 | +Epoch 6 |+--------------------------------+-------+ +Epoch 6 | +Epoch 6 |Statistics of the quantization share: +Epoch 6 |+----------------------------------+--------------------+ +Epoch 6 || Statistic's name | Value | +Epoch 6 |+==================================+====================+ +Epoch 6 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 6 |+----------------------------------+--------------------+ +Epoch 6 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 6 |+----------------------------------+--------------------+ +Epoch 6 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 6 |+----------------------------------+--------------------+ +Epoch 6 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 6 |+----------------------------------+--------------------+ +Epoch 6 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 6 |+----------------------------------+--------------------+ +Epoch 6 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 6 |+----------------------------------+--------------------+ +Epoch 6 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 6 |+----------------------------------+--------------------+ +Epoch 6 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 6 |+----------------------------------+--------------------+ +Epoch 6 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 6 |+----------------------------------+--------------------+ +Epoch 6 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 6 |+----------------------------------+--------------------+ +Epoch 6 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 6 |+----------------------------------+--------------------+ +Epoch 6 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 6 |+----------------------------------+--------------------+ +Epoch 6 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 6 |+----------------------------------+--------------------+ +Epoch 6 | +Epoch 6 |Statistics of the bitwidth distribution: +Epoch 6 |+--------------+---------------------+--------------------+--------------------+ +Epoch 6 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 6 || | WQs | Placed AQs | Qs | +Epoch 6 |+==============+=====================+====================+====================+ +Epoch 6 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 6 || | | | 173) | +Epoch 6 |+--------------+---------------------+--------------------+--------------------+ +Epoch 6 | +Epoch 6 |Statistics of the sparsified model: +Epoch 6 |+-----------------------------------------+-------+ +Epoch 6 || Statistic's name | Value | +Epoch 6 |+=========================================+=======+ +Epoch 6 || Sparsity level of the whole model | 0.598 | +Epoch 6 |+-----------------------------------------+-------+ +Epoch 6 || Sparsity level of all sparsified layers | 0.770 | +Epoch 6 |+-----------------------------------------+-------+ +Epoch 6 | +Epoch 6 |Statistics by sparsified layers: +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 6 |+======================+================+================+=====================+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.568 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[0]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[qu | | | | +Epoch 6 || ery]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.576 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[0]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[ke | | | | +Epoch 6 || y]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.566 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[0]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[va | | | | +Epoch 6 || lue]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.585 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[0]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfOutput[ou | | | | +Epoch 6 || tput]/NNCFLinear[den | | | | +Epoch 6 || se]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [3072, 768] | 0.869 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[0]/Be | | | | +Epoch 6 || rtIntermediate[inter | | | | +Epoch 6 || mediate]/NNCFLinear[ | | | | +Epoch 6 || dense]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 3072] | 0.876 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[0]/Be | | | | +Epoch 6 || rtOutput[output]/NNC | | | | +Epoch 6 || FLinear[dense]/linea | | | | +Epoch 6 || r_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.564 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[1]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[qu | | | | +Epoch 6 || ery]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.568 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[1]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[ke | | | | +Epoch 6 || y]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.565 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[1]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[va | | | | +Epoch 6 || lue]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.586 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[1]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfOutput[ou | | | | +Epoch 6 || tput]/NNCFLinear[den | | | | +Epoch 6 || se]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [3072, 768] | 0.870 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[1]/Be | | | | +Epoch 6 || rtIntermediate[inter | | | | +Epoch 6 || mediate]/NNCFLinear[ | | | | +Epoch 6 || dense]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 3072] | 0.878 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[1]/Be | | | | +Epoch 6 || rtOutput[output]/NNC | | | | +Epoch 6 || FLinear[dense]/linea | | | | +Epoch 6 || r_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.584 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[2]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[qu | | | | +Epoch 6 || ery]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.584 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[2]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[ke | | | | +Epoch 6 || y]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.568 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[2]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[va | | | | +Epoch 6 || lue]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.579 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[2]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfOutput[ou | | | | +Epoch 6 || tput]/NNCFLinear[den | | | | +Epoch 6 || se]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [3072, 768] | 0.871 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[2]/Be | | | | +Epoch 6 || rtIntermediate[inter | | | | +Epoch 6 || mediate]/NNCFLinear[ | | | | +Epoch 6 || dense]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 3072] | 0.877 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[2]/Be | | | | +Epoch 6 || rtOutput[output]/NNC | | | | +Epoch 6 || FLinear[dense]/linea | | | | +Epoch 6 || r_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.561 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[3]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[qu | | | | +Epoch 6 || ery]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.563 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[3]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[ke | | | | +Epoch 6 || y]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.569 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[3]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[va | | | | +Epoch 6 || lue]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.575 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[3]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfOutput[ou | | | | +Epoch 6 || tput]/NNCFLinear[den | | | | +Epoch 6 || se]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [3072, 768] | 0.871 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[3]/Be | | | | +Epoch 6 || rtIntermediate[inter | | | | +Epoch 6 || mediate]/NNCFLinear[ | | | | +Epoch 6 || dense]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 3072] | 0.880 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[3]/Be | | | | +Epoch 6 || rtOutput[output]/NNC | | | | +Epoch 6 || FLinear[dense]/linea | | | | +Epoch 6 || r_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.558 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[4]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[qu | | | | +Epoch 6 || ery]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.558 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[4]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[ke | | | | +Epoch 6 || y]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.564 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[4]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[va | | | | +Epoch 6 || lue]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.574 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[4]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfOutput[ou | | | | +Epoch 6 || tput]/NNCFLinear[den | | | | +Epoch 6 || se]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [3072, 768] | 0.871 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[4]/Be | | | | +Epoch 6 || rtIntermediate[inter | | | | +Epoch 6 || mediate]/NNCFLinear[ | | | | +Epoch 6 || dense]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 3072] | 0.879 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[4]/Be | | | | +Epoch 6 || rtOutput[output]/NNC | | | | +Epoch 6 || FLinear[dense]/linea | | | | +Epoch 6 || r_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.557 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[5]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[qu | | | | +Epoch 6 || ery]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.559 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[5]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[ke | | | | +Epoch 6 || y]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.569 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[5]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[va | | | | +Epoch 6 || lue]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.574 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[5]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfOutput[ou | | | | +Epoch 6 || tput]/NNCFLinear[den | | | | +Epoch 6 || se]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [3072, 768] | 0.871 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[5]/Be | | | | +Epoch 6 || rtIntermediate[inter | | | | +Epoch 6 || mediate]/NNCFLinear[ | | | | +Epoch 6 || dense]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 3072] | 0.879 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[5]/Be | | | | +Epoch 6 || rtOutput[output]/NNC | | | | +Epoch 6 || FLinear[dense]/linea | | | | +Epoch 6 || r_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.555 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[6]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[qu | | | | +Epoch 6 || ery]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.556 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[6]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[ke | | | | +Epoch 6 || y]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.568 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[6]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[va | | | | +Epoch 6 || lue]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.574 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[6]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfOutput[ou | | | | +Epoch 6 || tput]/NNCFLinear[den | | | | +Epoch 6 || se]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [3072, 768] | 0.870 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[6]/Be | | | | +Epoch 6 || rtIntermediate[inter | | | | +Epoch 6 || mediate]/NNCFLinear[ | | | | +Epoch 6 || dense]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 3072] | 0.877 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[6]/Be | | | | +Epoch 6 || rtOutput[output]/NNC | | | | +Epoch 6 || FLinear[dense]/linea | | | | +Epoch 6 || r_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.555 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[7]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[qu | | | | +Epoch 6 || ery]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.556 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[7]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[ke | | | | +Epoch 6 || y]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.563 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[7]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[va | | | | +Epoch 6 || lue]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.568 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[7]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfOutput[ou | | | | +Epoch 6 || tput]/NNCFLinear[den | | | | +Epoch 6 || se]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [3072, 768] | 0.869 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[7]/Be | | | | +Epoch 6 || rtIntermediate[inter | | | | +Epoch 6 || mediate]/NNCFLinear[ | | | | +Epoch 6 || dense]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 3072] | 0.875 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[7]/Be | | | | +Epoch 6 || rtOutput[output]/NNC | | | | +Epoch 6 || FLinear[dense]/linea | | | | +Epoch 6 || r_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.556 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[8]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[qu | | | | +Epoch 6 || ery]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.556 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[8]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[ke | | | | +Epoch 6 || y]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.559 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[8]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[va | | | | +Epoch 6 || lue]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.563 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[8]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfOutput[ou | | | | +Epoch 6 || tput]/NNCFLinear[den | | | | +Epoch 6 || se]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [3072, 768] | 0.869 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[8]/Be | | | | +Epoch 6 || rtIntermediate[inter | | | | +Epoch 6 || mediate]/NNCFLinear[ | | | | +Epoch 6 || dense]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 3072] | 0.874 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[8]/Be | | | | +Epoch 6 || rtOutput[output]/NNC | | | | +Epoch 6 || FLinear[dense]/linea | | | | +Epoch 6 || r_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.552 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[9]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[qu | | | | +Epoch 6 || ery]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.555 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[9]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[ke | | | | +Epoch 6 || y]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.558 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[9]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[va | | | | +Epoch 6 || lue]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.558 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[9]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfOutput[ou | | | | +Epoch 6 || tput]/NNCFLinear[den | | | | +Epoch 6 || se]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [3072, 768] | 0.873 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[9]/Be | | | | +Epoch 6 || rtIntermediate[inter | | | | +Epoch 6 || mediate]/NNCFLinear[ | | | | +Epoch 6 || dense]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 3072] | 0.878 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[9]/Be | | | | +Epoch 6 || rtOutput[output]/NNC | | | | +Epoch 6 || FLinear[dense]/linea | | | | +Epoch 6 || r_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.555 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[10]/B | | | | +Epoch 6 || ertAttention[attenti | | | | +Epoch 6 || on]/BertSelfAttentio | | | | +Epoch 6 || n[self]/NNCFLinear[q | | | | +Epoch 6 || uery]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.555 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[10]/B | | | | +Epoch 6 || ertAttention[attenti | | | | +Epoch 6 || on]/BertSelfAttentio | | | | +Epoch 6 || n[self]/NNCFLinear[k | | | | +Epoch 6 || ey]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.569 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[10]/B | | | | +Epoch 6 || ertAttention[attenti | | | | +Epoch 6 || on]/BertSelfAttentio | | | | +Epoch 6 || n[self]/NNCFLinear[v | | | | +Epoch 6 || alue]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.564 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[10]/B | | | | +Epoch 6 || ertAttention[attenti | | | | +Epoch 6 || on]/BertSelfOutput[o | | | | +Epoch 6 || utput]/NNCFLinear[de | | | | +Epoch 6 || nse]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [3072, 768] | 0.870 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[10]/B | | | | +Epoch 6 || ertIntermediate[inte | | | | +Epoch 6 || rmediate]/NNCFLinear | | | | +Epoch 6 || [dense]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 3072] | 0.874 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[10]/B | | | | +Epoch 6 || ertOutput[output]/NN | | | | +Epoch 6 || CFLinear[dense]/line | | | | +Epoch 6 || ar_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.558 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[11]/B | | | | +Epoch 6 || ertAttention[attenti | | | | +Epoch 6 || on]/BertSelfAttentio | | | | +Epoch 6 || n[self]/NNCFLinear[q | | | | +Epoch 6 || uery]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.554 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[11]/B | | | | +Epoch 6 || ertAttention[attenti | | | | +Epoch 6 || on]/BertSelfAttentio | | | | +Epoch 6 || n[self]/NNCFLinear[k | | | | +Epoch 6 || ey]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.560 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[11]/B | | | | +Epoch 6 || ertAttention[attenti | | | | +Epoch 6 || on]/BertSelfAttentio | | | | +Epoch 6 || n[self]/NNCFLinear[v | | | | +Epoch 6 || alue]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.555 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[11]/B | | | | +Epoch 6 || ertAttention[attenti | | | | +Epoch 6 || on]/BertSelfOutput[o | | | | +Epoch 6 || utput]/NNCFLinear[de | | | | +Epoch 6 || nse]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [3072, 768] | 0.868 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[11]/B | | | | +Epoch 6 || ertIntermediate[inte | | | | +Epoch 6 || rmediate]/NNCFLinear | | | | +Epoch 6 || [dense]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 3072] | 0.871 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[11]/B | | | | +Epoch 6 || ertOutput[output]/NN | | | | +Epoch 6 || CFLinear[dense]/line | | | | +Epoch 6 || ar_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 | +Epoch 6 |Statistics of the magnitude sparsity algorithm: +Epoch 6 |+----------------------------------------------------------------------+-------+ +Epoch 6 || Statistic's name | Value | +Epoch 6 |+======================================================================+=======+ +Epoch 6 || A target level of the sparsity for the algorithm for the current | 0.770 | +Epoch 6 || epoch | | +Epoch 6 |+----------------------------------------------------------------------+-------+ +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || Layer's name | Sparsity threshold | +Epoch 6 |+=========================================================+====================+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 6 || near_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 6 || inear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 6 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 6 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 6 || near_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 6 || inear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 6 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 6 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 6 || near_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 6 || inear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 6 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 6 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 6 || near_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 6 || inear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 6 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 6 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 6 || near_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 6 || inear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 6 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 6 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 6 || near_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 6 || inear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 6 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 6 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 6 || near_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 6 || inear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 6 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 6 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 6 || near_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 6 || inear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 6 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 6 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 6 || near_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 6 || inear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 6 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 6 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 6 || near_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 6 || inear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 6 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 6 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 6 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 6 || /linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 6 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 6 || inear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 6 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 6 || /linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 6 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 6 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 6 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 6 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 6 || /linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 6 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 6 || inear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 6 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 6 || /linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 6 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 6 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 6 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 7 |+--------------------------------+-------+ +Epoch 7 || Statistic's name | Value | +Epoch 7 |+================================+=======+ +Epoch 7 || Ratio of enabled quantizations | 100 | +Epoch 7 |+--------------------------------+-------+ +Epoch 7 | +Epoch 7 |Statistics of the quantization share: +Epoch 7 |+----------------------------------+--------------------+ +Epoch 7 || Statistic's name | Value | +Epoch 7 |+==================================+====================+ +Epoch 7 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 7 |+----------------------------------+--------------------+ +Epoch 7 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 7 |+----------------------------------+--------------------+ +Epoch 7 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 7 |+----------------------------------+--------------------+ +Epoch 7 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 7 |+----------------------------------+--------------------+ +Epoch 7 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 7 |+----------------------------------+--------------------+ +Epoch 7 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 7 |+----------------------------------+--------------------+ +Epoch 7 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 7 |+----------------------------------+--------------------+ +Epoch 7 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 7 |+----------------------------------+--------------------+ +Epoch 7 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 7 |+----------------------------------+--------------------+ +Epoch 7 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 7 |+----------------------------------+--------------------+ +Epoch 7 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 7 |+----------------------------------+--------------------+ +Epoch 7 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 7 |+----------------------------------+--------------------+ +Epoch 7 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 7 |+----------------------------------+--------------------+ +Epoch 7 | +Epoch 7 |Statistics of the bitwidth distribution: +Epoch 7 |+--------------+---------------------+--------------------+--------------------+ +Epoch 7 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 7 || | WQs | Placed AQs | Qs | +Epoch 7 |+==============+=====================+====================+====================+ +Epoch 7 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 7 || | | | 173) | +Epoch 7 |+--------------+---------------------+--------------------+--------------------+ +Epoch 7 | +Epoch 7 |Statistics of the sparsified model: +Epoch 7 |+-----------------------------------------+-------+ +Epoch 7 || Statistic's name | Value | +Epoch 7 |+=========================================+=======+ +Epoch 7 || Sparsity level of the whole model | 0.614 | +Epoch 7 |+-----------------------------------------+-------+ +Epoch 7 || Sparsity level of all sparsified layers | 0.791 | +Epoch 7 |+-----------------------------------------+-------+ +Epoch 7 | +Epoch 7 |Statistics by sparsified layers: +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 7 |+======================+================+================+=====================+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[0]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[qu | | | | +Epoch 7 || ery]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[0]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[ke | | | | +Epoch 7 || y]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.590 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[0]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[va | | | | +Epoch 7 || lue]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.609 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[0]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfOutput[ou | | | | +Epoch 7 || tput]/NNCFLinear[den | | | | +Epoch 7 || se]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [3072, 768] | 0.888 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[0]/Be | | | | +Epoch 7 || rtIntermediate[inter | | | | +Epoch 7 || mediate]/NNCFLinear[ | | | | +Epoch 7 || dense]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 3072] | 0.895 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[0]/Be | | | | +Epoch 7 || rtOutput[output]/NNC | | | | +Epoch 7 || FLinear[dense]/linea | | | | +Epoch 7 || r_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.589 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[1]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[qu | | | | +Epoch 7 || ery]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[1]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[ke | | | | +Epoch 7 || y]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.589 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[1]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[va | | | | +Epoch 7 || lue]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[1]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfOutput[ou | | | | +Epoch 7 || tput]/NNCFLinear[den | | | | +Epoch 7 || se]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [3072, 768] | 0.889 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[1]/Be | | | | +Epoch 7 || rtIntermediate[inter | | | | +Epoch 7 || mediate]/NNCFLinear[ | | | | +Epoch 7 || dense]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 3072] | 0.896 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[1]/Be | | | | +Epoch 7 || rtOutput[output]/NNC | | | | +Epoch 7 || FLinear[dense]/linea | | | | +Epoch 7 || r_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.609 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[2]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[qu | | | | +Epoch 7 || ery]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.609 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[2]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[ke | | | | +Epoch 7 || y]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[2]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[va | | | | +Epoch 7 || lue]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.603 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[2]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfOutput[ou | | | | +Epoch 7 || tput]/NNCFLinear[den | | | | +Epoch 7 || se]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [3072, 768] | 0.890 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[2]/Be | | | | +Epoch 7 || rtIntermediate[inter | | | | +Epoch 7 || mediate]/NNCFLinear[ | | | | +Epoch 7 || dense]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 3072] | 0.895 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[2]/Be | | | | +Epoch 7 || rtOutput[output]/NNC | | | | +Epoch 7 || FLinear[dense]/linea | | | | +Epoch 7 || r_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.586 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[3]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[qu | | | | +Epoch 7 || ery]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.588 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[3]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[ke | | | | +Epoch 7 || y]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[3]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[va | | | | +Epoch 7 || lue]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[3]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfOutput[ou | | | | +Epoch 7 || tput]/NNCFLinear[den | | | | +Epoch 7 || se]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [3072, 768] | 0.890 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[3]/Be | | | | +Epoch 7 || rtIntermediate[inter | | | | +Epoch 7 || mediate]/NNCFLinear[ | | | | +Epoch 7 || dense]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 3072] | 0.898 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[3]/Be | | | | +Epoch 7 || rtOutput[output]/NNC | | | | +Epoch 7 || FLinear[dense]/linea | | | | +Epoch 7 || r_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.583 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[4]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[qu | | | | +Epoch 7 || ery]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.583 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[4]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[ke | | | | +Epoch 7 || y]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.589 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[4]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[va | | | | +Epoch 7 || lue]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.599 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[4]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfOutput[ou | | | | +Epoch 7 || tput]/NNCFLinear[den | | | | +Epoch 7 || se]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [3072, 768] | 0.890 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[4]/Be | | | | +Epoch 7 || rtIntermediate[inter | | | | +Epoch 7 || mediate]/NNCFLinear[ | | | | +Epoch 7 || dense]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 3072] | 0.897 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[4]/Be | | | | +Epoch 7 || rtOutput[output]/NNC | | | | +Epoch 7 || FLinear[dense]/linea | | | | +Epoch 7 || r_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.582 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[5]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[qu | | | | +Epoch 7 || ery]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.584 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[5]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[ke | | | | +Epoch 7 || y]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[5]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[va | | | | +Epoch 7 || lue]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.599 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[5]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfOutput[ou | | | | +Epoch 7 || tput]/NNCFLinear[den | | | | +Epoch 7 || se]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [3072, 768] | 0.890 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[5]/Be | | | | +Epoch 7 || rtIntermediate[inter | | | | +Epoch 7 || mediate]/NNCFLinear[ | | | | +Epoch 7 || dense]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 3072] | 0.896 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[5]/Be | | | | +Epoch 7 || rtOutput[output]/NNC | | | | +Epoch 7 || FLinear[dense]/linea | | | | +Epoch 7 || r_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.581 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[6]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[qu | | | | +Epoch 7 || ery]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.581 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[6]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[ke | | | | +Epoch 7 || y]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[6]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[va | | | | +Epoch 7 || lue]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.599 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[6]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfOutput[ou | | | | +Epoch 7 || tput]/NNCFLinear[den | | | | +Epoch 7 || se]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [3072, 768] | 0.890 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[6]/Be | | | | +Epoch 7 || rtIntermediate[inter | | | | +Epoch 7 || mediate]/NNCFLinear[ | | | | +Epoch 7 || dense]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 3072] | 0.895 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[6]/Be | | | | +Epoch 7 || rtOutput[output]/NNC | | | | +Epoch 7 || FLinear[dense]/linea | | | | +Epoch 7 || r_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.580 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[7]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[qu | | | | +Epoch 7 || ery]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.581 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[7]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[ke | | | | +Epoch 7 || y]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.588 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[7]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[va | | | | +Epoch 7 || lue]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[7]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfOutput[ou | | | | +Epoch 7 || tput]/NNCFLinear[den | | | | +Epoch 7 || se]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [3072, 768] | 0.889 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[7]/Be | | | | +Epoch 7 || rtIntermediate[inter | | | | +Epoch 7 || mediate]/NNCFLinear[ | | | | +Epoch 7 || dense]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 3072] | 0.893 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[7]/Be | | | | +Epoch 7 || rtOutput[output]/NNC | | | | +Epoch 7 || FLinear[dense]/linea | | | | +Epoch 7 || r_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.581 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[8]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[qu | | | | +Epoch 7 || ery]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.581 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[8]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[ke | | | | +Epoch 7 || y]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.584 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[8]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[va | | | | +Epoch 7 || lue]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.588 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[8]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfOutput[ou | | | | +Epoch 7 || tput]/NNCFLinear[den | | | | +Epoch 7 || se]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [3072, 768] | 0.889 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[8]/Be | | | | +Epoch 7 || rtIntermediate[inter | | | | +Epoch 7 || mediate]/NNCFLinear[ | | | | +Epoch 7 || dense]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 3072] | 0.893 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[8]/Be | | | | +Epoch 7 || rtOutput[output]/NNC | | | | +Epoch 7 || FLinear[dense]/linea | | | | +Epoch 7 || r_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.577 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[9]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[qu | | | | +Epoch 7 || ery]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.580 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[9]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[ke | | | | +Epoch 7 || y]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.582 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[9]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[va | | | | +Epoch 7 || lue]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.583 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[9]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfOutput[ou | | | | +Epoch 7 || tput]/NNCFLinear[den | | | | +Epoch 7 || se]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [3072, 768] | 0.892 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[9]/Be | | | | +Epoch 7 || rtIntermediate[inter | | | | +Epoch 7 || mediate]/NNCFLinear[ | | | | +Epoch 7 || dense]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 3072] | 0.897 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[9]/Be | | | | +Epoch 7 || rtOutput[output]/NNC | | | | +Epoch 7 || FLinear[dense]/linea | | | | +Epoch 7 || r_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.580 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[10]/B | | | | +Epoch 7 || ertAttention[attenti | | | | +Epoch 7 || on]/BertSelfAttentio | | | | +Epoch 7 || n[self]/NNCFLinear[q | | | | +Epoch 7 || uery]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.580 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[10]/B | | | | +Epoch 7 || ertAttention[attenti | | | | +Epoch 7 || on]/BertSelfAttentio | | | | +Epoch 7 || n[self]/NNCFLinear[k | | | | +Epoch 7 || ey]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[10]/B | | | | +Epoch 7 || ertAttention[attenti | | | | +Epoch 7 || on]/BertSelfAttentio | | | | +Epoch 7 || n[self]/NNCFLinear[v | | | | +Epoch 7 || alue]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.589 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[10]/B | | | | +Epoch 7 || ertAttention[attenti | | | | +Epoch 7 || on]/BertSelfOutput[o | | | | +Epoch 7 || utput]/NNCFLinear[de | | | | +Epoch 7 || nse]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [3072, 768] | 0.890 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[10]/B | | | | +Epoch 7 || ertIntermediate[inte | | | | +Epoch 7 || rmediate]/NNCFLinear | | | | +Epoch 7 || [dense]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 3072] | 0.894 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[10]/B | | | | +Epoch 7 || ertOutput[output]/NN | | | | +Epoch 7 || CFLinear[dense]/line | | | | +Epoch 7 || ar_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.583 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[11]/B | | | | +Epoch 7 || ertAttention[attenti | | | | +Epoch 7 || on]/BertSelfAttentio | | | | +Epoch 7 || n[self]/NNCFLinear[q | | | | +Epoch 7 || uery]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.579 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[11]/B | | | | +Epoch 7 || ertAttention[attenti | | | | +Epoch 7 || on]/BertSelfAttentio | | | | +Epoch 7 || n[self]/NNCFLinear[k | | | | +Epoch 7 || ey]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.584 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[11]/B | | | | +Epoch 7 || ertAttention[attenti | | | | +Epoch 7 || on]/BertSelfAttentio | | | | +Epoch 7 || n[self]/NNCFLinear[v | | | | +Epoch 7 || alue]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.580 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[11]/B | | | | +Epoch 7 || ertAttention[attenti | | | | +Epoch 7 || on]/BertSelfOutput[o | | | | +Epoch 7 || utput]/NNCFLinear[de | | | | +Epoch 7 || nse]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [3072, 768] | 0.888 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[11]/B | | | | +Epoch 7 || ertIntermediate[inte | | | | +Epoch 7 || rmediate]/NNCFLinear | | | | +Epoch 7 || [dense]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 3072] | 0.890 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[11]/B | | | | +Epoch 7 || ertOutput[output]/NN | | | | +Epoch 7 || CFLinear[dense]/line | | | | +Epoch 7 || ar_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 | +Epoch 7 |Statistics of the magnitude sparsity algorithm: +Epoch 7 |+----------------------------------------------------------------------+-------+ +Epoch 7 || Statistic's name | Value | +Epoch 7 |+======================================================================+=======+ +Epoch 7 || A target level of the sparsity for the algorithm for the current | 0.791 | +Epoch 7 || epoch | | +Epoch 7 |+----------------------------------------------------------------------+-------+ +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || Layer's name | Sparsity threshold | +Epoch 7 |+=========================================================+====================+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 7 || near_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 7 || inear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 7 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 7 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 7 || near_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 7 || inear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 7 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 7 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 7 || near_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 7 || inear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 7 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 7 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 7 || near_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 7 || inear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 7 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 7 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 7 || near_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 7 || inear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 7 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 7 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 7 || near_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 7 || inear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 7 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 7 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 7 || near_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 7 || inear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 7 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 7 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 7 || near_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 7 || inear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 7 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 7 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 7 || near_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 7 || inear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 7 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 7 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 7 || near_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 7 || inear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 7 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 7 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 7 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 7 || /linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 7 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 7 || inear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 7 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 7 || /linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 7 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 7 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 7 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 7 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 7 || /linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 7 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 7 || inear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 7 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 7 || /linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 7 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 7 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 7 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 8 |+--------------------------------+-------+ +Epoch 8 || Statistic's name | Value | +Epoch 8 |+================================+=======+ +Epoch 8 || Ratio of enabled quantizations | 100 | +Epoch 8 |+--------------------------------+-------+ +Epoch 8 | +Epoch 8 |Statistics of the quantization share: +Epoch 8 |+----------------------------------+--------------------+ +Epoch 8 || Statistic's name | Value | +Epoch 8 |+==================================+====================+ +Epoch 8 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 8 |+----------------------------------+--------------------+ +Epoch 8 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 8 |+----------------------------------+--------------------+ +Epoch 8 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 8 |+----------------------------------+--------------------+ +Epoch 8 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 8 |+----------------------------------+--------------------+ +Epoch 8 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 8 |+----------------------------------+--------------------+ +Epoch 8 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 8 |+----------------------------------+--------------------+ +Epoch 8 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 8 |+----------------------------------+--------------------+ +Epoch 8 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 8 |+----------------------------------+--------------------+ +Epoch 8 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 8 |+----------------------------------+--------------------+ +Epoch 8 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 8 |+----------------------------------+--------------------+ +Epoch 8 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 8 |+----------------------------------+--------------------+ +Epoch 8 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 8 |+----------------------------------+--------------------+ +Epoch 8 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 8 |+----------------------------------+--------------------+ +Epoch 8 | +Epoch 8 |Statistics of the bitwidth distribution: +Epoch 8 |+--------------+---------------------+--------------------+--------------------+ +Epoch 8 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 8 || | WQs | Placed AQs | Qs | +Epoch 8 |+==============+=====================+====================+====================+ +Epoch 8 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 8 || | | | 173) | +Epoch 8 |+--------------+---------------------+--------------------+--------------------+ +Epoch 8 | +Epoch 8 |Statistics of the sparsified model: +Epoch 8 |+-----------------------------------------+-------+ +Epoch 8 || Statistic's name | Value | +Epoch 8 |+=========================================+=======+ +Epoch 8 || Sparsity level of the whole model | 0.620 | +Epoch 8 |+-----------------------------------------+-------+ +Epoch 8 || Sparsity level of all sparsified layers | 0.799 | +Epoch 8 |+-----------------------------------------+-------+ +Epoch 8 | +Epoch 8 |Statistics by sparsified layers: +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 8 |+======================+================+================+=====================+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.602 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[0]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[qu | | | | +Epoch 8 || ery]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[0]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[ke | | | | +Epoch 8 || y]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.599 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[0]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[va | | | | +Epoch 8 || lue]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.619 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[0]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfOutput[ou | | | | +Epoch 8 || tput]/NNCFLinear[den | | | | +Epoch 8 || se]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [3072, 768] | 0.895 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[0]/Be | | | | +Epoch 8 || rtIntermediate[inter | | | | +Epoch 8 || mediate]/NNCFLinear[ | | | | +Epoch 8 || dense]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 3072] | 0.901 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[0]/Be | | | | +Epoch 8 || rtOutput[output]/NNC | | | | +Epoch 8 || FLinear[dense]/linea | | | | +Epoch 8 || r_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.598 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[1]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[qu | | | | +Epoch 8 || ery]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.602 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[1]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[ke | | | | +Epoch 8 || y]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.599 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[1]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[va | | | | +Epoch 8 || lue]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.620 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[1]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfOutput[ou | | | | +Epoch 8 || tput]/NNCFLinear[den | | | | +Epoch 8 || se]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [3072, 768] | 0.896 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[1]/Be | | | | +Epoch 8 || rtIntermediate[inter | | | | +Epoch 8 || mediate]/NNCFLinear[ | | | | +Epoch 8 || dense]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 3072] | 0.903 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[1]/Be | | | | +Epoch 8 || rtOutput[output]/NNC | | | | +Epoch 8 || FLinear[dense]/linea | | | | +Epoch 8 || r_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.618 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[2]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[qu | | | | +Epoch 8 || ery]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.618 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[2]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[ke | | | | +Epoch 8 || y]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.602 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[2]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[va | | | | +Epoch 8 || lue]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.612 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[2]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfOutput[ou | | | | +Epoch 8 || tput]/NNCFLinear[den | | | | +Epoch 8 || se]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[2]/Be | | | | +Epoch 8 || rtIntermediate[inter | | | | +Epoch 8 || mediate]/NNCFLinear[ | | | | +Epoch 8 || dense]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 3072] | 0.901 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[2]/Be | | | | +Epoch 8 || rtOutput[output]/NNC | | | | +Epoch 8 || FLinear[dense]/linea | | | | +Epoch 8 || r_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.596 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[3]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[qu | | | | +Epoch 8 || ery]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.598 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[3]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[ke | | | | +Epoch 8 || y]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.603 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[3]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[va | | | | +Epoch 8 || lue]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[3]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfOutput[ou | | | | +Epoch 8 || tput]/NNCFLinear[den | | | | +Epoch 8 || se]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[3]/Be | | | | +Epoch 8 || rtIntermediate[inter | | | | +Epoch 8 || mediate]/NNCFLinear[ | | | | +Epoch 8 || dense]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 3072] | 0.904 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[3]/Be | | | | +Epoch 8 || rtOutput[output]/NNC | | | | +Epoch 8 || FLinear[dense]/linea | | | | +Epoch 8 || r_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[4]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[qu | | | | +Epoch 8 || ery]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[4]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[ke | | | | +Epoch 8 || y]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.598 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[4]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[va | | | | +Epoch 8 || lue]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.608 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[4]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfOutput[ou | | | | +Epoch 8 || tput]/NNCFLinear[den | | | | +Epoch 8 || se]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [3072, 768] | 0.896 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[4]/Be | | | | +Epoch 8 || rtIntermediate[inter | | | | +Epoch 8 || mediate]/NNCFLinear[ | | | | +Epoch 8 || dense]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 3072] | 0.903 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[4]/Be | | | | +Epoch 8 || rtOutput[output]/NNC | | | | +Epoch 8 || FLinear[dense]/linea | | | | +Epoch 8 || r_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[5]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[qu | | | | +Epoch 8 || ery]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[5]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[ke | | | | +Epoch 8 || y]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.603 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[5]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[va | | | | +Epoch 8 || lue]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.608 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[5]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfOutput[ou | | | | +Epoch 8 || tput]/NNCFLinear[den | | | | +Epoch 8 || se]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [3072, 768] | 0.896 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[5]/Be | | | | +Epoch 8 || rtIntermediate[inter | | | | +Epoch 8 || mediate]/NNCFLinear[ | | | | +Epoch 8 || dense]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 3072] | 0.903 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[5]/Be | | | | +Epoch 8 || rtOutput[output]/NNC | | | | +Epoch 8 || FLinear[dense]/linea | | | | +Epoch 8 || r_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.590 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[6]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[qu | | | | +Epoch 8 || ery]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.591 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[6]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[ke | | | | +Epoch 8 || y]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.602 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[6]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[va | | | | +Epoch 8 || lue]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.608 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[6]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfOutput[ou | | | | +Epoch 8 || tput]/NNCFLinear[den | | | | +Epoch 8 || se]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [3072, 768] | 0.896 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[6]/Be | | | | +Epoch 8 || rtIntermediate[inter | | | | +Epoch 8 || mediate]/NNCFLinear[ | | | | +Epoch 8 || dense]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 3072] | 0.901 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[6]/Be | | | | +Epoch 8 || rtOutput[output]/NNC | | | | +Epoch 8 || FLinear[dense]/linea | | | | +Epoch 8 || r_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.590 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[7]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[qu | | | | +Epoch 8 || ery]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.591 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[7]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[ke | | | | +Epoch 8 || y]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.597 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[7]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[va | | | | +Epoch 8 || lue]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.603 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[7]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfOutput[ou | | | | +Epoch 8 || tput]/NNCFLinear[den | | | | +Epoch 8 || se]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [3072, 768] | 0.896 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[7]/Be | | | | +Epoch 8 || rtIntermediate[inter | | | | +Epoch 8 || mediate]/NNCFLinear[ | | | | +Epoch 8 || dense]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 3072] | 0.899 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[7]/Be | | | | +Epoch 8 || rtOutput[output]/NNC | | | | +Epoch 8 || FLinear[dense]/linea | | | | +Epoch 8 || r_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.591 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[8]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[qu | | | | +Epoch 8 || ery]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.591 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[8]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[ke | | | | +Epoch 8 || y]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[8]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[va | | | | +Epoch 8 || lue]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.598 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[8]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfOutput[ou | | | | +Epoch 8 || tput]/NNCFLinear[den | | | | +Epoch 8 || se]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [3072, 768] | 0.896 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[8]/Be | | | | +Epoch 8 || rtIntermediate[inter | | | | +Epoch 8 || mediate]/NNCFLinear[ | | | | +Epoch 8 || dense]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 3072] | 0.900 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[8]/Be | | | | +Epoch 8 || rtOutput[output]/NNC | | | | +Epoch 8 || FLinear[dense]/linea | | | | +Epoch 8 || r_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.587 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[9]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[qu | | | | +Epoch 8 || ery]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.590 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[9]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[ke | | | | +Epoch 8 || y]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[9]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[va | | | | +Epoch 8 || lue]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[9]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfOutput[ou | | | | +Epoch 8 || tput]/NNCFLinear[den | | | | +Epoch 8 || se]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [3072, 768] | 0.899 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[9]/Be | | | | +Epoch 8 || rtIntermediate[inter | | | | +Epoch 8 || mediate]/NNCFLinear[ | | | | +Epoch 8 || dense]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 3072] | 0.904 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[9]/Be | | | | +Epoch 8 || rtOutput[output]/NNC | | | | +Epoch 8 || FLinear[dense]/linea | | | | +Epoch 8 || r_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.590 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[10]/B | | | | +Epoch 8 || ertAttention[attenti | | | | +Epoch 8 || on]/BertSelfAttentio | | | | +Epoch 8 || n[self]/NNCFLinear[q | | | | +Epoch 8 || uery]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.590 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[10]/B | | | | +Epoch 8 || ertAttention[attenti | | | | +Epoch 8 || on]/BertSelfAttentio | | | | +Epoch 8 || n[self]/NNCFLinear[k | | | | +Epoch 8 || ey]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.603 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[10]/B | | | | +Epoch 8 || ertAttention[attenti | | | | +Epoch 8 || on]/BertSelfAttentio | | | | +Epoch 8 || n[self]/NNCFLinear[v | | | | +Epoch 8 || alue]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.598 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[10]/B | | | | +Epoch 8 || ertAttention[attenti | | | | +Epoch 8 || on]/BertSelfOutput[o | | | | +Epoch 8 || utput]/NNCFLinear[de | | | | +Epoch 8 || nse]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[10]/B | | | | +Epoch 8 || ertIntermediate[inte | | | | +Epoch 8 || rmediate]/NNCFLinear | | | | +Epoch 8 || [dense]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 3072] | 0.901 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[10]/B | | | | +Epoch 8 || ertOutput[output]/NN | | | | +Epoch 8 || CFLinear[dense]/line | | | | +Epoch 8 || ar_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[11]/B | | | | +Epoch 8 || ertAttention[attenti | | | | +Epoch 8 || on]/BertSelfAttentio | | | | +Epoch 8 || n[self]/NNCFLinear[q | | | | +Epoch 8 || uery]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.588 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[11]/B | | | | +Epoch 8 || ertAttention[attenti | | | | +Epoch 8 || on]/BertSelfAttentio | | | | +Epoch 8 || n[self]/NNCFLinear[k | | | | +Epoch 8 || ey]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[11]/B | | | | +Epoch 8 || ertAttention[attenti | | | | +Epoch 8 || on]/BertSelfAttentio | | | | +Epoch 8 || n[self]/NNCFLinear[v | | | | +Epoch 8 || alue]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.590 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[11]/B | | | | +Epoch 8 || ertAttention[attenti | | | | +Epoch 8 || on]/BertSelfOutput[o | | | | +Epoch 8 || utput]/NNCFLinear[de | | | | +Epoch 8 || nse]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [3072, 768] | 0.895 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[11]/B | | | | +Epoch 8 || ertIntermediate[inte | | | | +Epoch 8 || rmediate]/NNCFLinear | | | | +Epoch 8 || [dense]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 3072] | 0.897 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[11]/B | | | | +Epoch 8 || ertOutput[output]/NN | | | | +Epoch 8 || CFLinear[dense]/line | | | | +Epoch 8 || ar_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 | +Epoch 8 |Statistics of the magnitude sparsity algorithm: +Epoch 8 |+----------------------------------------------------------------------+-------+ +Epoch 8 || Statistic's name | Value | +Epoch 8 |+======================================================================+=======+ +Epoch 8 || A target level of the sparsity for the algorithm for the current | 0.799 | +Epoch 8 || epoch | | +Epoch 8 |+----------------------------------------------------------------------+-------+ +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || Layer's name | Sparsity threshold | +Epoch 8 |+=========================================================+====================+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 8 || near_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 8 || inear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 8 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 8 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 8 || near_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 8 || inear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 8 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 8 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 8 || near_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 8 || inear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 8 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 8 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 8 || near_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 8 || inear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 8 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 8 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 8 || near_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 8 || inear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 8 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 8 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 8 || near_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 8 || inear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 8 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 8 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 8 || near_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 8 || inear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 8 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 8 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 8 || near_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 8 || inear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 8 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 8 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 8 || near_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 8 || inear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 8 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 8 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 8 || near_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 8 || inear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 8 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 8 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 8 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 8 || /linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 8 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 8 || inear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 8 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 8 || /linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 8 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 8 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 8 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 8 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 8 || /linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 8 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 8 || inear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 8 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 8 || /linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 8 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 8 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 8 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 9 |+--------------------------------+-------+ +Epoch 9 || Statistic's name | Value | +Epoch 9 |+================================+=======+ +Epoch 9 || Ratio of enabled quantizations | 100 | +Epoch 9 |+--------------------------------+-------+ +Epoch 9 | +Epoch 9 |Statistics of the quantization share: +Epoch 9 |+----------------------------------+--------------------+ +Epoch 9 || Statistic's name | Value | +Epoch 9 |+==================================+====================+ +Epoch 9 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 9 |+----------------------------------+--------------------+ +Epoch 9 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 9 |+----------------------------------+--------------------+ +Epoch 9 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 9 |+----------------------------------+--------------------+ +Epoch 9 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 9 |+----------------------------------+--------------------+ +Epoch 9 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 9 |+----------------------------------+--------------------+ +Epoch 9 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 9 |+----------------------------------+--------------------+ +Epoch 9 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 9 |+----------------------------------+--------------------+ +Epoch 9 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 9 |+----------------------------------+--------------------+ +Epoch 9 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 9 |+----------------------------------+--------------------+ +Epoch 9 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 9 |+----------------------------------+--------------------+ +Epoch 9 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 9 |+----------------------------------+--------------------+ +Epoch 9 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 9 |+----------------------------------+--------------------+ +Epoch 9 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 9 |+----------------------------------+--------------------+ +Epoch 9 | +Epoch 9 |Statistics of the bitwidth distribution: +Epoch 9 |+--------------+---------------------+--------------------+--------------------+ +Epoch 9 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 9 || | WQs | Placed AQs | Qs | +Epoch 9 |+==============+=====================+====================+====================+ +Epoch 9 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 9 || | | | 173) | +Epoch 9 |+--------------+---------------------+--------------------+--------------------+ +Epoch 9 | +Epoch 9 |Statistics of the sparsified model: +Epoch 9 |+-----------------------------------------+-------+ +Epoch 9 || Statistic's name | Value | +Epoch 9 |+=========================================+=======+ +Epoch 9 || Sparsity level of the whole model | 0.621 | +Epoch 9 |+-----------------------------------------+-------+ +Epoch 9 || Sparsity level of all sparsified layers | 0.800 | +Epoch 9 |+-----------------------------------------+-------+ +Epoch 9 | +Epoch 9 |Statistics by sparsified layers: +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 9 |+======================+================+================+=====================+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[0]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[qu | | | | +Epoch 9 || ery]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.612 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[0]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[ke | | | | +Epoch 9 || y]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[0]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[va | | | | +Epoch 9 || lue]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.621 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[0]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfOutput[ou | | | | +Epoch 9 || tput]/NNCFLinear[den | | | | +Epoch 9 || se]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [3072, 768] | 0.896 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[0]/Be | | | | +Epoch 9 || rtIntermediate[inter | | | | +Epoch 9 || mediate]/NNCFLinear[ | | | | +Epoch 9 || dense]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 3072] | 0.902 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[0]/Be | | | | +Epoch 9 || rtOutput[output]/NNC | | | | +Epoch 9 || FLinear[dense]/linea | | | | +Epoch 9 || r_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[1]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[qu | | | | +Epoch 9 || ery]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.603 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[1]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[ke | | | | +Epoch 9 || y]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[1]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[va | | | | +Epoch 9 || lue]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.622 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[1]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfOutput[ou | | | | +Epoch 9 || tput]/NNCFLinear[den | | | | +Epoch 9 || se]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[1]/Be | | | | +Epoch 9 || rtIntermediate[inter | | | | +Epoch 9 || mediate]/NNCFLinear[ | | | | +Epoch 9 || dense]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 3072] | 0.904 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[1]/Be | | | | +Epoch 9 || rtOutput[output]/NNC | | | | +Epoch 9 || FLinear[dense]/linea | | | | +Epoch 9 || r_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.620 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[2]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[qu | | | | +Epoch 9 || ery]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.620 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[2]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[ke | | | | +Epoch 9 || y]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[2]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[va | | | | +Epoch 9 || lue]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.614 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[2]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfOutput[ou | | | | +Epoch 9 || tput]/NNCFLinear[den | | | | +Epoch 9 || se]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [3072, 768] | 0.898 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[2]/Be | | | | +Epoch 9 || rtIntermediate[inter | | | | +Epoch 9 || mediate]/NNCFLinear[ | | | | +Epoch 9 || dense]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 3072] | 0.902 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[2]/Be | | | | +Epoch 9 || rtOutput[output]/NNC | | | | +Epoch 9 || FLinear[dense]/linea | | | | +Epoch 9 || r_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.597 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[3]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[qu | | | | +Epoch 9 || ery]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.599 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[3]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[ke | | | | +Epoch 9 || y]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[3]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[va | | | | +Epoch 9 || lue]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.612 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[3]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfOutput[ou | | | | +Epoch 9 || tput]/NNCFLinear[den | | | | +Epoch 9 || se]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [3072, 768] | 0.898 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[3]/Be | | | | +Epoch 9 || rtIntermediate[inter | | | | +Epoch 9 || mediate]/NNCFLinear[ | | | | +Epoch 9 || dense]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 3072] | 0.905 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[3]/Be | | | | +Epoch 9 || rtOutput[output]/NNC | | | | +Epoch 9 || FLinear[dense]/linea | | | | +Epoch 9 || r_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[4]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[qu | | | | +Epoch 9 || ery]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[4]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[ke | | | | +Epoch 9 || y]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[4]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[va | | | | +Epoch 9 || lue]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[4]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfOutput[ou | | | | +Epoch 9 || tput]/NNCFLinear[den | | | | +Epoch 9 || se]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[4]/Be | | | | +Epoch 9 || rtIntermediate[inter | | | | +Epoch 9 || mediate]/NNCFLinear[ | | | | +Epoch 9 || dense]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 3072] | 0.904 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[4]/Be | | | | +Epoch 9 || rtOutput[output]/NNC | | | | +Epoch 9 || FLinear[dense]/linea | | | | +Epoch 9 || r_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[5]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[qu | | | | +Epoch 9 || ery]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.596 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[5]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[ke | | | | +Epoch 9 || y]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[5]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[va | | | | +Epoch 9 || lue]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[5]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfOutput[ou | | | | +Epoch 9 || tput]/NNCFLinear[den | | | | +Epoch 9 || se]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[5]/Be | | | | +Epoch 9 || rtIntermediate[inter | | | | +Epoch 9 || mediate]/NNCFLinear[ | | | | +Epoch 9 || dense]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 3072] | 0.903 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[5]/Be | | | | +Epoch 9 || rtOutput[output]/NNC | | | | +Epoch 9 || FLinear[dense]/linea | | | | +Epoch 9 || r_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[6]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[qu | | | | +Epoch 9 || ery]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[6]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[ke | | | | +Epoch 9 || y]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[6]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[va | | | | +Epoch 9 || lue]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[6]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfOutput[ou | | | | +Epoch 9 || tput]/NNCFLinear[den | | | | +Epoch 9 || se]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[6]/Be | | | | +Epoch 9 || rtIntermediate[inter | | | | +Epoch 9 || mediate]/NNCFLinear[ | | | | +Epoch 9 || dense]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 3072] | 0.902 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[6]/Be | | | | +Epoch 9 || rtOutput[output]/NNC | | | | +Epoch 9 || FLinear[dense]/linea | | | | +Epoch 9 || r_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[7]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[qu | | | | +Epoch 9 || ery]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[7]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[ke | | | | +Epoch 9 || y]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.599 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[7]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[va | | | | +Epoch 9 || lue]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[7]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfOutput[ou | | | | +Epoch 9 || tput]/NNCFLinear[den | | | | +Epoch 9 || se]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[7]/Be | | | | +Epoch 9 || rtIntermediate[inter | | | | +Epoch 9 || mediate]/NNCFLinear[ | | | | +Epoch 9 || dense]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 3072] | 0.900 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[7]/Be | | | | +Epoch 9 || rtOutput[output]/NNC | | | | +Epoch 9 || FLinear[dense]/linea | | | | +Epoch 9 || r_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[8]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[qu | | | | +Epoch 9 || ery]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[8]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[ke | | | | +Epoch 9 || y]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[8]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[va | | | | +Epoch 9 || lue]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[8]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfOutput[ou | | | | +Epoch 9 || tput]/NNCFLinear[den | | | | +Epoch 9 || se]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[8]/Be | | | | +Epoch 9 || rtIntermediate[inter | | | | +Epoch 9 || mediate]/NNCFLinear[ | | | | +Epoch 9 || dense]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 3072] | 0.900 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[8]/Be | | | | +Epoch 9 || rtOutput[output]/NNC | | | | +Epoch 9 || FLinear[dense]/linea | | | | +Epoch 9 || r_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.588 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[9]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[qu | | | | +Epoch 9 || ery]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.591 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[9]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[ke | | | | +Epoch 9 || y]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[9]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[va | | | | +Epoch 9 || lue]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[9]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfOutput[ou | | | | +Epoch 9 || tput]/NNCFLinear[den | | | | +Epoch 9 || se]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [3072, 768] | 0.900 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[9]/Be | | | | +Epoch 9 || rtIntermediate[inter | | | | +Epoch 9 || mediate]/NNCFLinear[ | | | | +Epoch 9 || dense]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 3072] | 0.904 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[9]/Be | | | | +Epoch 9 || rtOutput[output]/NNC | | | | +Epoch 9 || FLinear[dense]/linea | | | | +Epoch 9 || r_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[10]/B | | | | +Epoch 9 || ertAttention[attenti | | | | +Epoch 9 || on]/BertSelfAttentio | | | | +Epoch 9 || n[self]/NNCFLinear[q | | | | +Epoch 9 || uery]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.591 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[10]/B | | | | +Epoch 9 || ertAttention[attenti | | | | +Epoch 9 || on]/BertSelfAttentio | | | | +Epoch 9 || n[self]/NNCFLinear[k | | | | +Epoch 9 || ey]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[10]/B | | | | +Epoch 9 || ertAttention[attenti | | | | +Epoch 9 || on]/BertSelfAttentio | | | | +Epoch 9 || n[self]/NNCFLinear[v | | | | +Epoch 9 || alue]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[10]/B | | | | +Epoch 9 || ertAttention[attenti | | | | +Epoch 9 || on]/BertSelfOutput[o | | | | +Epoch 9 || utput]/NNCFLinear[de | | | | +Epoch 9 || nse]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [3072, 768] | 0.898 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[10]/B | | | | +Epoch 9 || ertIntermediate[inte | | | | +Epoch 9 || rmediate]/NNCFLinear | | | | +Epoch 9 || [dense]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 3072] | 0.902 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[10]/B | | | | +Epoch 9 || ertOutput[output]/NN | | | | +Epoch 9 || CFLinear[dense]/line | | | | +Epoch 9 || ar_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[11]/B | | | | +Epoch 9 || ertAttention[attenti | | | | +Epoch 9 || on]/BertSelfAttentio | | | | +Epoch 9 || n[self]/NNCFLinear[q | | | | +Epoch 9 || uery]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.590 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[11]/B | | | | +Epoch 9 || ertAttention[attenti | | | | +Epoch 9 || on]/BertSelfAttentio | | | | +Epoch 9 || n[self]/NNCFLinear[k | | | | +Epoch 9 || ey]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[11]/B | | | | +Epoch 9 || ertAttention[attenti | | | | +Epoch 9 || on]/BertSelfAttentio | | | | +Epoch 9 || n[self]/NNCFLinear[v | | | | +Epoch 9 || alue]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[11]/B | | | | +Epoch 9 || ertAttention[attenti | | | | +Epoch 9 || on]/BertSelfOutput[o | | | | +Epoch 9 || utput]/NNCFLinear[de | | | | +Epoch 9 || nse]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [3072, 768] | 0.896 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[11]/B | | | | +Epoch 9 || ertIntermediate[inte | | | | +Epoch 9 || rmediate]/NNCFLinear | | | | +Epoch 9 || [dense]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 3072] | 0.898 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[11]/B | | | | +Epoch 9 || ertOutput[output]/NN | | | | +Epoch 9 || CFLinear[dense]/line | | | | +Epoch 9 || ar_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 | +Epoch 9 |Statistics of the magnitude sparsity algorithm: +Epoch 9 |+----------------------------------------------------------------------+-------+ +Epoch 9 || Statistic's name | Value | +Epoch 9 |+======================================================================+=======+ +Epoch 9 || A target level of the sparsity for the algorithm for the current | 0.800 | +Epoch 9 || epoch | | +Epoch 9 |+----------------------------------------------------------------------+-------+ +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || Layer's name | Sparsity threshold | +Epoch 9 |+=========================================================+====================+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 9 || near_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 9 || inear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 9 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 9 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 9 || near_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 9 || inear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 9 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 9 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 9 || near_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 9 || inear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 9 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 9 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 9 || near_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 9 || inear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 9 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 9 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 9 || near_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 9 || inear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 9 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 9 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 9 || near_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 9 || inear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 9 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 9 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 9 || near_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 9 || inear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 9 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 9 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 9 || near_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 9 || inear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 9 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 9 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 9 || near_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 9 || inear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 9 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 9 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 9 || near_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 9 || inear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 9 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 9 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 9 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 9 || /linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 9 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 9 || inear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 9 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 9 || /linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 9 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 9 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 9 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 9 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 9 || /linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 9 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 9 || inear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 9 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 9 || /linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 9 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 9 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 9 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 10 |+--------------------------------+-------+ +Epoch 10 || Statistic's name | Value | +Epoch 10 |+================================+=======+ +Epoch 10 || Ratio of enabled quantizations | 100 | +Epoch 10 |+--------------------------------+-------+ +Epoch 10 | +Epoch 10 |Statistics of the quantization share: +Epoch 10 |+----------------------------------+--------------------+ +Epoch 10 || Statistic's name | Value | +Epoch 10 |+==================================+====================+ +Epoch 10 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 10 |+----------------------------------+--------------------+ +Epoch 10 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 10 |+----------------------------------+--------------------+ +Epoch 10 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 10 |+----------------------------------+--------------------+ +Epoch 10 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 10 |+----------------------------------+--------------------+ +Epoch 10 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 10 |+----------------------------------+--------------------+ +Epoch 10 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 10 |+----------------------------------+--------------------+ +Epoch 10 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 10 |+----------------------------------+--------------------+ +Epoch 10 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 10 |+----------------------------------+--------------------+ +Epoch 10 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 10 |+----------------------------------+--------------------+ +Epoch 10 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 10 |+----------------------------------+--------------------+ +Epoch 10 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 10 |+----------------------------------+--------------------+ +Epoch 10 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 10 |+----------------------------------+--------------------+ +Epoch 10 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 10 |+----------------------------------+--------------------+ +Epoch 10 | +Epoch 10 |Statistics of the bitwidth distribution: +Epoch 10 |+--------------+---------------------+--------------------+--------------------+ +Epoch 10 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 10 || | WQs | Placed AQs | Qs | +Epoch 10 |+==============+=====================+====================+====================+ +Epoch 10 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 10 || | | | 173) | +Epoch 10 |+--------------+---------------------+--------------------+--------------------+ +Epoch 10 | +Epoch 10 |Statistics of the sparsified model: +Epoch 10 |+-----------------------------------------+-------+ +Epoch 10 || Statistic's name | Value | +Epoch 10 |+=========================================+=======+ +Epoch 10 || Sparsity level of the whole model | 0.621 | +Epoch 10 |+-----------------------------------------+-------+ +Epoch 10 || Sparsity level of all sparsified layers | 0.800 | +Epoch 10 |+-----------------------------------------+-------+ +Epoch 10 | +Epoch 10 |Statistics by sparsified layers: +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 10 |+======================+================+================+=====================+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[0]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[qu | | | | +Epoch 10 || ery]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.612 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[0]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[ke | | | | +Epoch 10 || y]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[0]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[va | | | | +Epoch 10 || lue]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.621 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[0]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfOutput[ou | | | | +Epoch 10 || tput]/NNCFLinear[den | | | | +Epoch 10 || se]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [3072, 768] | 0.896 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[0]/Be | | | | +Epoch 10 || rtIntermediate[inter | | | | +Epoch 10 || mediate]/NNCFLinear[ | | | | +Epoch 10 || dense]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 3072] | 0.902 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[0]/Be | | | | +Epoch 10 || rtOutput[output]/NNC | | | | +Epoch 10 || FLinear[dense]/linea | | | | +Epoch 10 || r_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[1]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[qu | | | | +Epoch 10 || ery]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.603 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[1]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[ke | | | | +Epoch 10 || y]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[1]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[va | | | | +Epoch 10 || lue]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.622 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[1]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfOutput[ou | | | | +Epoch 10 || tput]/NNCFLinear[den | | | | +Epoch 10 || se]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[1]/Be | | | | +Epoch 10 || rtIntermediate[inter | | | | +Epoch 10 || mediate]/NNCFLinear[ | | | | +Epoch 10 || dense]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 3072] | 0.904 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[1]/Be | | | | +Epoch 10 || rtOutput[output]/NNC | | | | +Epoch 10 || FLinear[dense]/linea | | | | +Epoch 10 || r_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.620 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[2]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[qu | | | | +Epoch 10 || ery]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.620 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[2]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[ke | | | | +Epoch 10 || y]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[2]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[va | | | | +Epoch 10 || lue]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.614 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[2]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfOutput[ou | | | | +Epoch 10 || tput]/NNCFLinear[den | | | | +Epoch 10 || se]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [3072, 768] | 0.898 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[2]/Be | | | | +Epoch 10 || rtIntermediate[inter | | | | +Epoch 10 || mediate]/NNCFLinear[ | | | | +Epoch 10 || dense]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 3072] | 0.902 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[2]/Be | | | | +Epoch 10 || rtOutput[output]/NNC | | | | +Epoch 10 || FLinear[dense]/linea | | | | +Epoch 10 || r_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.597 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[3]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[qu | | | | +Epoch 10 || ery]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.599 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[3]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[ke | | | | +Epoch 10 || y]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[3]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[va | | | | +Epoch 10 || lue]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.612 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[3]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfOutput[ou | | | | +Epoch 10 || tput]/NNCFLinear[den | | | | +Epoch 10 || se]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [3072, 768] | 0.898 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[3]/Be | | | | +Epoch 10 || rtIntermediate[inter | | | | +Epoch 10 || mediate]/NNCFLinear[ | | | | +Epoch 10 || dense]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 3072] | 0.905 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[3]/Be | | | | +Epoch 10 || rtOutput[output]/NNC | | | | +Epoch 10 || FLinear[dense]/linea | | | | +Epoch 10 || r_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[4]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[qu | | | | +Epoch 10 || ery]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[4]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[ke | | | | +Epoch 10 || y]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[4]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[va | | | | +Epoch 10 || lue]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[4]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfOutput[ou | | | | +Epoch 10 || tput]/NNCFLinear[den | | | | +Epoch 10 || se]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[4]/Be | | | | +Epoch 10 || rtIntermediate[inter | | | | +Epoch 10 || mediate]/NNCFLinear[ | | | | +Epoch 10 || dense]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 3072] | 0.904 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[4]/Be | | | | +Epoch 10 || rtOutput[output]/NNC | | | | +Epoch 10 || FLinear[dense]/linea | | | | +Epoch 10 || r_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[5]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[qu | | | | +Epoch 10 || ery]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.596 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[5]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[ke | | | | +Epoch 10 || y]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[5]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[va | | | | +Epoch 10 || lue]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[5]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfOutput[ou | | | | +Epoch 10 || tput]/NNCFLinear[den | | | | +Epoch 10 || se]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[5]/Be | | | | +Epoch 10 || rtIntermediate[inter | | | | +Epoch 10 || mediate]/NNCFLinear[ | | | | +Epoch 10 || dense]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 3072] | 0.903 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[5]/Be | | | | +Epoch 10 || rtOutput[output]/NNC | | | | +Epoch 10 || FLinear[dense]/linea | | | | +Epoch 10 || r_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[6]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[qu | | | | +Epoch 10 || ery]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[6]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[ke | | | | +Epoch 10 || y]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[6]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[va | | | | +Epoch 10 || lue]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[6]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfOutput[ou | | | | +Epoch 10 || tput]/NNCFLinear[den | | | | +Epoch 10 || se]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[6]/Be | | | | +Epoch 10 || rtIntermediate[inter | | | | +Epoch 10 || mediate]/NNCFLinear[ | | | | +Epoch 10 || dense]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 3072] | 0.902 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[6]/Be | | | | +Epoch 10 || rtOutput[output]/NNC | | | | +Epoch 10 || FLinear[dense]/linea | | | | +Epoch 10 || r_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[7]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[qu | | | | +Epoch 10 || ery]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[7]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[ke | | | | +Epoch 10 || y]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.599 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[7]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[va | | | | +Epoch 10 || lue]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[7]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfOutput[ou | | | | +Epoch 10 || tput]/NNCFLinear[den | | | | +Epoch 10 || se]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[7]/Be | | | | +Epoch 10 || rtIntermediate[inter | | | | +Epoch 10 || mediate]/NNCFLinear[ | | | | +Epoch 10 || dense]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 3072] | 0.900 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[7]/Be | | | | +Epoch 10 || rtOutput[output]/NNC | | | | +Epoch 10 || FLinear[dense]/linea | | | | +Epoch 10 || r_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[8]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[qu | | | | +Epoch 10 || ery]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[8]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[ke | | | | +Epoch 10 || y]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[8]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[va | | | | +Epoch 10 || lue]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[8]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfOutput[ou | | | | +Epoch 10 || tput]/NNCFLinear[den | | | | +Epoch 10 || se]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[8]/Be | | | | +Epoch 10 || rtIntermediate[inter | | | | +Epoch 10 || mediate]/NNCFLinear[ | | | | +Epoch 10 || dense]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 3072] | 0.900 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[8]/Be | | | | +Epoch 10 || rtOutput[output]/NNC | | | | +Epoch 10 || FLinear[dense]/linea | | | | +Epoch 10 || r_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.588 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[9]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[qu | | | | +Epoch 10 || ery]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.591 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[9]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[ke | | | | +Epoch 10 || y]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[9]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[va | | | | +Epoch 10 || lue]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[9]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfOutput[ou | | | | +Epoch 10 || tput]/NNCFLinear[den | | | | +Epoch 10 || se]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [3072, 768] | 0.900 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[9]/Be | | | | +Epoch 10 || rtIntermediate[inter | | | | +Epoch 10 || mediate]/NNCFLinear[ | | | | +Epoch 10 || dense]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 3072] | 0.904 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[9]/Be | | | | +Epoch 10 || rtOutput[output]/NNC | | | | +Epoch 10 || FLinear[dense]/linea | | | | +Epoch 10 || r_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[10]/B | | | | +Epoch 10 || ertAttention[attenti | | | | +Epoch 10 || on]/BertSelfAttentio | | | | +Epoch 10 || n[self]/NNCFLinear[q | | | | +Epoch 10 || uery]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.591 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[10]/B | | | | +Epoch 10 || ertAttention[attenti | | | | +Epoch 10 || on]/BertSelfAttentio | | | | +Epoch 10 || n[self]/NNCFLinear[k | | | | +Epoch 10 || ey]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[10]/B | | | | +Epoch 10 || ertAttention[attenti | | | | +Epoch 10 || on]/BertSelfAttentio | | | | +Epoch 10 || n[self]/NNCFLinear[v | | | | +Epoch 10 || alue]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[10]/B | | | | +Epoch 10 || ertAttention[attenti | | | | +Epoch 10 || on]/BertSelfOutput[o | | | | +Epoch 10 || utput]/NNCFLinear[de | | | | +Epoch 10 || nse]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [3072, 768] | 0.898 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[10]/B | | | | +Epoch 10 || ertIntermediate[inte | | | | +Epoch 10 || rmediate]/NNCFLinear | | | | +Epoch 10 || [dense]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 3072] | 0.902 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[10]/B | | | | +Epoch 10 || ertOutput[output]/NN | | | | +Epoch 10 || CFLinear[dense]/line | | | | +Epoch 10 || ar_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[11]/B | | | | +Epoch 10 || ertAttention[attenti | | | | +Epoch 10 || on]/BertSelfAttentio | | | | +Epoch 10 || n[self]/NNCFLinear[q | | | | +Epoch 10 || uery]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.590 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[11]/B | | | | +Epoch 10 || ertAttention[attenti | | | | +Epoch 10 || on]/BertSelfAttentio | | | | +Epoch 10 || n[self]/NNCFLinear[k | | | | +Epoch 10 || ey]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[11]/B | | | | +Epoch 10 || ertAttention[attenti | | | | +Epoch 10 || on]/BertSelfAttentio | | | | +Epoch 10 || n[self]/NNCFLinear[v | | | | +Epoch 10 || alue]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[11]/B | | | | +Epoch 10 || ertAttention[attenti | | | | +Epoch 10 || on]/BertSelfOutput[o | | | | +Epoch 10 || utput]/NNCFLinear[de | | | | +Epoch 10 || nse]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [3072, 768] | 0.896 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[11]/B | | | | +Epoch 10 || ertIntermediate[inte | | | | +Epoch 10 || rmediate]/NNCFLinear | | | | +Epoch 10 || [dense]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 3072] | 0.898 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[11]/B | | | | +Epoch 10 || ertOutput[output]/NN | | | | +Epoch 10 || CFLinear[dense]/line | | | | +Epoch 10 || ar_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 | +Epoch 10 |Statistics of the magnitude sparsity algorithm: +Epoch 10 |+----------------------------------------------------------------------+-------+ +Epoch 10 || Statistic's name | Value | +Epoch 10 |+======================================================================+=======+ +Epoch 10 || A target level of the sparsity for the algorithm for the current | 0.800 | +Epoch 10 || epoch | | +Epoch 10 |+----------------------------------------------------------------------+-------+ +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || Layer's name | Sparsity threshold | +Epoch 10 |+=========================================================+====================+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 10 || near_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 10 || inear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 10 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 10 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 10 || near_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 10 || inear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 10 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 10 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 10 || near_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 10 || inear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 10 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 10 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 10 || near_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 10 || inear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 10 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 10 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 10 || near_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 10 || inear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 10 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 10 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 10 || near_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 10 || inear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 10 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 10 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 10 || near_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 10 || inear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 10 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 10 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 10 || near_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 10 || inear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 10 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 10 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 10 || near_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 10 || inear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 10 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 10 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 10 || near_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 10 || inear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 10 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 10 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 10 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 10 || /linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 10 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 10 || inear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 10 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 10 || /linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 10 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 10 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 10 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 10 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 10 || /linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 10 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 10 || inear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 10 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 10 || /linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 10 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 10 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 10 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 11 |+--------------------------------+-------+ +Epoch 11 || Statistic's name | Value | +Epoch 11 |+================================+=======+ +Epoch 11 || Ratio of enabled quantizations | 100 | +Epoch 11 |+--------------------------------+-------+ +Epoch 11 | +Epoch 11 |Statistics of the quantization share: +Epoch 11 |+----------------------------------+--------------------+ +Epoch 11 || Statistic's name | Value | +Epoch 11 |+==================================+====================+ +Epoch 11 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 11 |+----------------------------------+--------------------+ +Epoch 11 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 11 |+----------------------------------+--------------------+ +Epoch 11 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 11 |+----------------------------------+--------------------+ +Epoch 11 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 11 |+----------------------------------+--------------------+ +Epoch 11 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 11 |+----------------------------------+--------------------+ +Epoch 11 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 11 |+----------------------------------+--------------------+ +Epoch 11 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 11 |+----------------------------------+--------------------+ +Epoch 11 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 11 |+----------------------------------+--------------------+ +Epoch 11 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 11 |+----------------------------------+--------------------+ +Epoch 11 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 11 |+----------------------------------+--------------------+ +Epoch 11 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 11 |+----------------------------------+--------------------+ +Epoch 11 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 11 |+----------------------------------+--------------------+ +Epoch 11 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 11 |+----------------------------------+--------------------+ +Epoch 11 | +Epoch 11 |Statistics of the bitwidth distribution: +Epoch 11 |+--------------+---------------------+--------------------+--------------------+ +Epoch 11 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 11 || | WQs | Placed AQs | Qs | +Epoch 11 |+==============+=====================+====================+====================+ +Epoch 11 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 11 || | | | 173) | +Epoch 11 |+--------------+---------------------+--------------------+--------------------+ +Epoch 11 | +Epoch 11 |Statistics of the sparsified model: +Epoch 11 |+-----------------------------------------+-------+ +Epoch 11 || Statistic's name | Value | +Epoch 11 |+=========================================+=======+ +Epoch 11 || Sparsity level of the whole model | 0.621 | +Epoch 11 |+-----------------------------------------+-------+ +Epoch 11 || Sparsity level of all sparsified layers | 0.800 | +Epoch 11 |+-----------------------------------------+-------+ +Epoch 11 | +Epoch 11 |Statistics by sparsified layers: +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 11 |+======================+================+================+=====================+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[0]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[qu | | | | +Epoch 11 || ery]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.612 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[0]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[ke | | | | +Epoch 11 || y]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[0]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[va | | | | +Epoch 11 || lue]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.621 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[0]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfOutput[ou | | | | +Epoch 11 || tput]/NNCFLinear[den | | | | +Epoch 11 || se]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [3072, 768] | 0.896 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[0]/Be | | | | +Epoch 11 || rtIntermediate[inter | | | | +Epoch 11 || mediate]/NNCFLinear[ | | | | +Epoch 11 || dense]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 3072] | 0.902 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[0]/Be | | | | +Epoch 11 || rtOutput[output]/NNC | | | | +Epoch 11 || FLinear[dense]/linea | | | | +Epoch 11 || r_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[1]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[qu | | | | +Epoch 11 || ery]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.603 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[1]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[ke | | | | +Epoch 11 || y]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[1]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[va | | | | +Epoch 11 || lue]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.622 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[1]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfOutput[ou | | | | +Epoch 11 || tput]/NNCFLinear[den | | | | +Epoch 11 || se]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[1]/Be | | | | +Epoch 11 || rtIntermediate[inter | | | | +Epoch 11 || mediate]/NNCFLinear[ | | | | +Epoch 11 || dense]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 3072] | 0.904 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[1]/Be | | | | +Epoch 11 || rtOutput[output]/NNC | | | | +Epoch 11 || FLinear[dense]/linea | | | | +Epoch 11 || r_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.620 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[2]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[qu | | | | +Epoch 11 || ery]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.620 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[2]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[ke | | | | +Epoch 11 || y]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[2]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[va | | | | +Epoch 11 || lue]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.614 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[2]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfOutput[ou | | | | +Epoch 11 || tput]/NNCFLinear[den | | | | +Epoch 11 || se]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [3072, 768] | 0.898 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[2]/Be | | | | +Epoch 11 || rtIntermediate[inter | | | | +Epoch 11 || mediate]/NNCFLinear[ | | | | +Epoch 11 || dense]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 3072] | 0.902 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[2]/Be | | | | +Epoch 11 || rtOutput[output]/NNC | | | | +Epoch 11 || FLinear[dense]/linea | | | | +Epoch 11 || r_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.597 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[3]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[qu | | | | +Epoch 11 || ery]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.599 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[3]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[ke | | | | +Epoch 11 || y]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[3]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[va | | | | +Epoch 11 || lue]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.612 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[3]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfOutput[ou | | | | +Epoch 11 || tput]/NNCFLinear[den | | | | +Epoch 11 || se]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [3072, 768] | 0.898 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[3]/Be | | | | +Epoch 11 || rtIntermediate[inter | | | | +Epoch 11 || mediate]/NNCFLinear[ | | | | +Epoch 11 || dense]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 3072] | 0.905 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[3]/Be | | | | +Epoch 11 || rtOutput[output]/NNC | | | | +Epoch 11 || FLinear[dense]/linea | | | | +Epoch 11 || r_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[4]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[qu | | | | +Epoch 11 || ery]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[4]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[ke | | | | +Epoch 11 || y]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[4]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[va | | | | +Epoch 11 || lue]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[4]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfOutput[ou | | | | +Epoch 11 || tput]/NNCFLinear[den | | | | +Epoch 11 || se]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[4]/Be | | | | +Epoch 11 || rtIntermediate[inter | | | | +Epoch 11 || mediate]/NNCFLinear[ | | | | +Epoch 11 || dense]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 3072] | 0.904 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[4]/Be | | | | +Epoch 11 || rtOutput[output]/NNC | | | | +Epoch 11 || FLinear[dense]/linea | | | | +Epoch 11 || r_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[5]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[qu | | | | +Epoch 11 || ery]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.596 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[5]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[ke | | | | +Epoch 11 || y]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[5]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[va | | | | +Epoch 11 || lue]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[5]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfOutput[ou | | | | +Epoch 11 || tput]/NNCFLinear[den | | | | +Epoch 11 || se]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[5]/Be | | | | +Epoch 11 || rtIntermediate[inter | | | | +Epoch 11 || mediate]/NNCFLinear[ | | | | +Epoch 11 || dense]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 3072] | 0.903 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[5]/Be | | | | +Epoch 11 || rtOutput[output]/NNC | | | | +Epoch 11 || FLinear[dense]/linea | | | | +Epoch 11 || r_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[6]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[qu | | | | +Epoch 11 || ery]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[6]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[ke | | | | +Epoch 11 || y]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[6]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[va | | | | +Epoch 11 || lue]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[6]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfOutput[ou | | | | +Epoch 11 || tput]/NNCFLinear[den | | | | +Epoch 11 || se]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[6]/Be | | | | +Epoch 11 || rtIntermediate[inter | | | | +Epoch 11 || mediate]/NNCFLinear[ | | | | +Epoch 11 || dense]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 3072] | 0.902 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[6]/Be | | | | +Epoch 11 || rtOutput[output]/NNC | | | | +Epoch 11 || FLinear[dense]/linea | | | | +Epoch 11 || r_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[7]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[qu | | | | +Epoch 11 || ery]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[7]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[ke | | | | +Epoch 11 || y]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.599 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[7]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[va | | | | +Epoch 11 || lue]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[7]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfOutput[ou | | | | +Epoch 11 || tput]/NNCFLinear[den | | | | +Epoch 11 || se]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[7]/Be | | | | +Epoch 11 || rtIntermediate[inter | | | | +Epoch 11 || mediate]/NNCFLinear[ | | | | +Epoch 11 || dense]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 3072] | 0.900 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[7]/Be | | | | +Epoch 11 || rtOutput[output]/NNC | | | | +Epoch 11 || FLinear[dense]/linea | | | | +Epoch 11 || r_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[8]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[qu | | | | +Epoch 11 || ery]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[8]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[ke | | | | +Epoch 11 || y]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[8]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[va | | | | +Epoch 11 || lue]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[8]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfOutput[ou | | | | +Epoch 11 || tput]/NNCFLinear[den | | | | +Epoch 11 || se]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[8]/Be | | | | +Epoch 11 || rtIntermediate[inter | | | | +Epoch 11 || mediate]/NNCFLinear[ | | | | +Epoch 11 || dense]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 3072] | 0.900 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[8]/Be | | | | +Epoch 11 || rtOutput[output]/NNC | | | | +Epoch 11 || FLinear[dense]/linea | | | | +Epoch 11 || r_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.588 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[9]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[qu | | | | +Epoch 11 || ery]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.591 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[9]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[ke | | | | +Epoch 11 || y]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[9]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[va | | | | +Epoch 11 || lue]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[9]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfOutput[ou | | | | +Epoch 11 || tput]/NNCFLinear[den | | | | +Epoch 11 || se]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [3072, 768] | 0.900 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[9]/Be | | | | +Epoch 11 || rtIntermediate[inter | | | | +Epoch 11 || mediate]/NNCFLinear[ | | | | +Epoch 11 || dense]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 3072] | 0.904 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[9]/Be | | | | +Epoch 11 || rtOutput[output]/NNC | | | | +Epoch 11 || FLinear[dense]/linea | | | | +Epoch 11 || r_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[10]/B | | | | +Epoch 11 || ertAttention[attenti | | | | +Epoch 11 || on]/BertSelfAttentio | | | | +Epoch 11 || n[self]/NNCFLinear[q | | | | +Epoch 11 || uery]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.591 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[10]/B | | | | +Epoch 11 || ertAttention[attenti | | | | +Epoch 11 || on]/BertSelfAttentio | | | | +Epoch 11 || n[self]/NNCFLinear[k | | | | +Epoch 11 || ey]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[10]/B | | | | +Epoch 11 || ertAttention[attenti | | | | +Epoch 11 || on]/BertSelfAttentio | | | | +Epoch 11 || n[self]/NNCFLinear[v | | | | +Epoch 11 || alue]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[10]/B | | | | +Epoch 11 || ertAttention[attenti | | | | +Epoch 11 || on]/BertSelfOutput[o | | | | +Epoch 11 || utput]/NNCFLinear[de | | | | +Epoch 11 || nse]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [3072, 768] | 0.898 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[10]/B | | | | +Epoch 11 || ertIntermediate[inte | | | | +Epoch 11 || rmediate]/NNCFLinear | | | | +Epoch 11 || [dense]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 3072] | 0.902 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[10]/B | | | | +Epoch 11 || ertOutput[output]/NN | | | | +Epoch 11 || CFLinear[dense]/line | | | | +Epoch 11 || ar_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[11]/B | | | | +Epoch 11 || ertAttention[attenti | | | | +Epoch 11 || on]/BertSelfAttentio | | | | +Epoch 11 || n[self]/NNCFLinear[q | | | | +Epoch 11 || uery]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.590 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[11]/B | | | | +Epoch 11 || ertAttention[attenti | | | | +Epoch 11 || on]/BertSelfAttentio | | | | +Epoch 11 || n[self]/NNCFLinear[k | | | | +Epoch 11 || ey]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[11]/B | | | | +Epoch 11 || ertAttention[attenti | | | | +Epoch 11 || on]/BertSelfAttentio | | | | +Epoch 11 || n[self]/NNCFLinear[v | | | | +Epoch 11 || alue]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[11]/B | | | | +Epoch 11 || ertAttention[attenti | | | | +Epoch 11 || on]/BertSelfOutput[o | | | | +Epoch 11 || utput]/NNCFLinear[de | | | | +Epoch 11 || nse]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [3072, 768] | 0.896 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[11]/B | | | | +Epoch 11 || ertIntermediate[inte | | | | +Epoch 11 || rmediate]/NNCFLinear | | | | +Epoch 11 || [dense]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 3072] | 0.898 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[11]/B | | | | +Epoch 11 || ertOutput[output]/NN | | | | +Epoch 11 || CFLinear[dense]/line | | | | +Epoch 11 || ar_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 | +Epoch 11 |Statistics of the magnitude sparsity algorithm: +Epoch 11 |+----------------------------------------------------------------------+-------+ +Epoch 11 || Statistic's name | Value | +Epoch 11 |+======================================================================+=======+ +Epoch 11 || A target level of the sparsity for the algorithm for the current | 0.800 | +Epoch 11 || epoch | | +Epoch 11 |+----------------------------------------------------------------------+-------+ +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || Layer's name | Sparsity threshold | +Epoch 11 |+=========================================================+====================+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 11 || near_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 11 || inear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 11 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 11 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 11 || near_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 11 || inear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 11 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 11 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 11 || near_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 11 || inear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 11 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 11 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 11 || near_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 11 || inear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 11 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 11 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 11 || near_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 11 || inear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 11 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 11 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 11 || near_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 11 || inear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 11 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 11 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 11 || near_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 11 || inear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 11 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 11 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 11 || near_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 11 || inear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 11 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 11 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 11 || near_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 11 || inear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 11 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 11 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 11 || near_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 11 || inear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 11 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 11 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 11 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 11 || /linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 11 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 11 || inear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 11 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 11 || /linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 11 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 11 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 11 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 11 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 11 || /linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 11 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 11 || inear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 11 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 11 || /linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 11 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 11 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 11 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 12 |+--------------------------------+-------+ +Epoch 12 || Statistic's name | Value | +Epoch 12 |+================================+=======+ +Epoch 12 || Ratio of enabled quantizations | 100 | +Epoch 12 |+--------------------------------+-------+ +Epoch 12 | +Epoch 12 |Statistics of the quantization share: +Epoch 12 |+----------------------------------+--------------------+ +Epoch 12 || Statistic's name | Value | +Epoch 12 |+==================================+====================+ +Epoch 12 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 12 |+----------------------------------+--------------------+ +Epoch 12 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 12 |+----------------------------------+--------------------+ +Epoch 12 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 12 |+----------------------------------+--------------------+ +Epoch 12 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 12 |+----------------------------------+--------------------+ +Epoch 12 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 12 |+----------------------------------+--------------------+ +Epoch 12 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 12 |+----------------------------------+--------------------+ +Epoch 12 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 12 |+----------------------------------+--------------------+ +Epoch 12 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 12 |+----------------------------------+--------------------+ +Epoch 12 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 12 |+----------------------------------+--------------------+ +Epoch 12 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 12 |+----------------------------------+--------------------+ +Epoch 12 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 12 |+----------------------------------+--------------------+ +Epoch 12 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 12 |+----------------------------------+--------------------+ +Epoch 12 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 12 |+----------------------------------+--------------------+ +Epoch 12 | +Epoch 12 |Statistics of the bitwidth distribution: +Epoch 12 |+--------------+---------------------+--------------------+--------------------+ +Epoch 12 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 12 || | WQs | Placed AQs | Qs | +Epoch 12 |+==============+=====================+====================+====================+ +Epoch 12 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 12 || | | | 173) | +Epoch 12 |+--------------+---------------------+--------------------+--------------------+ +Epoch 12 | +Epoch 12 |Statistics of the sparsified model: +Epoch 12 |+-----------------------------------------+-------+ +Epoch 12 || Statistic's name | Value | +Epoch 12 |+=========================================+=======+ +Epoch 12 || Sparsity level of the whole model | 0.621 | +Epoch 12 |+-----------------------------------------+-------+ +Epoch 12 || Sparsity level of all sparsified layers | 0.800 | +Epoch 12 |+-----------------------------------------+-------+ +Epoch 12 | +Epoch 12 |Statistics by sparsified layers: +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 12 |+======================+================+================+=====================+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[0]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[qu | | | | +Epoch 12 || ery]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.612 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[0]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[ke | | | | +Epoch 12 || y]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[0]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[va | | | | +Epoch 12 || lue]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.621 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[0]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfOutput[ou | | | | +Epoch 12 || tput]/NNCFLinear[den | | | | +Epoch 12 || se]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [3072, 768] | 0.896 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[0]/Be | | | | +Epoch 12 || rtIntermediate[inter | | | | +Epoch 12 || mediate]/NNCFLinear[ | | | | +Epoch 12 || dense]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 3072] | 0.902 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[0]/Be | | | | +Epoch 12 || rtOutput[output]/NNC | | | | +Epoch 12 || FLinear[dense]/linea | | | | +Epoch 12 || r_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[1]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[qu | | | | +Epoch 12 || ery]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.603 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[1]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[ke | | | | +Epoch 12 || y]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[1]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[va | | | | +Epoch 12 || lue]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.622 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[1]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfOutput[ou | | | | +Epoch 12 || tput]/NNCFLinear[den | | | | +Epoch 12 || se]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[1]/Be | | | | +Epoch 12 || rtIntermediate[inter | | | | +Epoch 12 || mediate]/NNCFLinear[ | | | | +Epoch 12 || dense]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 3072] | 0.904 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[1]/Be | | | | +Epoch 12 || rtOutput[output]/NNC | | | | +Epoch 12 || FLinear[dense]/linea | | | | +Epoch 12 || r_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.620 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[2]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[qu | | | | +Epoch 12 || ery]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.620 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[2]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[ke | | | | +Epoch 12 || y]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[2]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[va | | | | +Epoch 12 || lue]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.614 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[2]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfOutput[ou | | | | +Epoch 12 || tput]/NNCFLinear[den | | | | +Epoch 12 || se]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [3072, 768] | 0.898 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[2]/Be | | | | +Epoch 12 || rtIntermediate[inter | | | | +Epoch 12 || mediate]/NNCFLinear[ | | | | +Epoch 12 || dense]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 3072] | 0.902 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[2]/Be | | | | +Epoch 12 || rtOutput[output]/NNC | | | | +Epoch 12 || FLinear[dense]/linea | | | | +Epoch 12 || r_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.597 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[3]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[qu | | | | +Epoch 12 || ery]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.599 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[3]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[ke | | | | +Epoch 12 || y]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[3]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[va | | | | +Epoch 12 || lue]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.612 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[3]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfOutput[ou | | | | +Epoch 12 || tput]/NNCFLinear[den | | | | +Epoch 12 || se]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [3072, 768] | 0.898 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[3]/Be | | | | +Epoch 12 || rtIntermediate[inter | | | | +Epoch 12 || mediate]/NNCFLinear[ | | | | +Epoch 12 || dense]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 3072] | 0.905 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[3]/Be | | | | +Epoch 12 || rtOutput[output]/NNC | | | | +Epoch 12 || FLinear[dense]/linea | | | | +Epoch 12 || r_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[4]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[qu | | | | +Epoch 12 || ery]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[4]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[ke | | | | +Epoch 12 || y]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[4]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[va | | | | +Epoch 12 || lue]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[4]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfOutput[ou | | | | +Epoch 12 || tput]/NNCFLinear[den | | | | +Epoch 12 || se]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[4]/Be | | | | +Epoch 12 || rtIntermediate[inter | | | | +Epoch 12 || mediate]/NNCFLinear[ | | | | +Epoch 12 || dense]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 3072] | 0.904 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[4]/Be | | | | +Epoch 12 || rtOutput[output]/NNC | | | | +Epoch 12 || FLinear[dense]/linea | | | | +Epoch 12 || r_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[5]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[qu | | | | +Epoch 12 || ery]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.596 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[5]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[ke | | | | +Epoch 12 || y]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[5]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[va | | | | +Epoch 12 || lue]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[5]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfOutput[ou | | | | +Epoch 12 || tput]/NNCFLinear[den | | | | +Epoch 12 || se]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[5]/Be | | | | +Epoch 12 || rtIntermediate[inter | | | | +Epoch 12 || mediate]/NNCFLinear[ | | | | +Epoch 12 || dense]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 3072] | 0.903 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[5]/Be | | | | +Epoch 12 || rtOutput[output]/NNC | | | | +Epoch 12 || FLinear[dense]/linea | | | | +Epoch 12 || r_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[6]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[qu | | | | +Epoch 12 || ery]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[6]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[ke | | | | +Epoch 12 || y]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[6]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[va | | | | +Epoch 12 || lue]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[6]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfOutput[ou | | | | +Epoch 12 || tput]/NNCFLinear[den | | | | +Epoch 12 || se]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[6]/Be | | | | +Epoch 12 || rtIntermediate[inter | | | | +Epoch 12 || mediate]/NNCFLinear[ | | | | +Epoch 12 || dense]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 3072] | 0.902 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[6]/Be | | | | +Epoch 12 || rtOutput[output]/NNC | | | | +Epoch 12 || FLinear[dense]/linea | | | | +Epoch 12 || r_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[7]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[qu | | | | +Epoch 12 || ery]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[7]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[ke | | | | +Epoch 12 || y]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.599 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[7]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[va | | | | +Epoch 12 || lue]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[7]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfOutput[ou | | | | +Epoch 12 || tput]/NNCFLinear[den | | | | +Epoch 12 || se]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[7]/Be | | | | +Epoch 12 || rtIntermediate[inter | | | | +Epoch 12 || mediate]/NNCFLinear[ | | | | +Epoch 12 || dense]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 3072] | 0.900 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[7]/Be | | | | +Epoch 12 || rtOutput[output]/NNC | | | | +Epoch 12 || FLinear[dense]/linea | | | | +Epoch 12 || r_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[8]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[qu | | | | +Epoch 12 || ery]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[8]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[ke | | | | +Epoch 12 || y]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[8]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[va | | | | +Epoch 12 || lue]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[8]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfOutput[ou | | | | +Epoch 12 || tput]/NNCFLinear[den | | | | +Epoch 12 || se]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[8]/Be | | | | +Epoch 12 || rtIntermediate[inter | | | | +Epoch 12 || mediate]/NNCFLinear[ | | | | +Epoch 12 || dense]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 3072] | 0.900 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[8]/Be | | | | +Epoch 12 || rtOutput[output]/NNC | | | | +Epoch 12 || FLinear[dense]/linea | | | | +Epoch 12 || r_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.588 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[9]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[qu | | | | +Epoch 12 || ery]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.591 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[9]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[ke | | | | +Epoch 12 || y]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[9]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[va | | | | +Epoch 12 || lue]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[9]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfOutput[ou | | | | +Epoch 12 || tput]/NNCFLinear[den | | | | +Epoch 12 || se]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [3072, 768] | 0.900 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[9]/Be | | | | +Epoch 12 || rtIntermediate[inter | | | | +Epoch 12 || mediate]/NNCFLinear[ | | | | +Epoch 12 || dense]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 3072] | 0.904 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[9]/Be | | | | +Epoch 12 || rtOutput[output]/NNC | | | | +Epoch 12 || FLinear[dense]/linea | | | | +Epoch 12 || r_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[10]/B | | | | +Epoch 12 || ertAttention[attenti | | | | +Epoch 12 || on]/BertSelfAttentio | | | | +Epoch 12 || n[self]/NNCFLinear[q | | | | +Epoch 12 || uery]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.591 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[10]/B | | | | +Epoch 12 || ertAttention[attenti | | | | +Epoch 12 || on]/BertSelfAttentio | | | | +Epoch 12 || n[self]/NNCFLinear[k | | | | +Epoch 12 || ey]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[10]/B | | | | +Epoch 12 || ertAttention[attenti | | | | +Epoch 12 || on]/BertSelfAttentio | | | | +Epoch 12 || n[self]/NNCFLinear[v | | | | +Epoch 12 || alue]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[10]/B | | | | +Epoch 12 || ertAttention[attenti | | | | +Epoch 12 || on]/BertSelfOutput[o | | | | +Epoch 12 || utput]/NNCFLinear[de | | | | +Epoch 12 || nse]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [3072, 768] | 0.898 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[10]/B | | | | +Epoch 12 || ertIntermediate[inte | | | | +Epoch 12 || rmediate]/NNCFLinear | | | | +Epoch 12 || [dense]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 3072] | 0.902 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[10]/B | | | | +Epoch 12 || ertOutput[output]/NN | | | | +Epoch 12 || CFLinear[dense]/line | | | | +Epoch 12 || ar_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[11]/B | | | | +Epoch 12 || ertAttention[attenti | | | | +Epoch 12 || on]/BertSelfAttentio | | | | +Epoch 12 || n[self]/NNCFLinear[q | | | | +Epoch 12 || uery]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.590 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[11]/B | | | | +Epoch 12 || ertAttention[attenti | | | | +Epoch 12 || on]/BertSelfAttentio | | | | +Epoch 12 || n[self]/NNCFLinear[k | | | | +Epoch 12 || ey]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[11]/B | | | | +Epoch 12 || ertAttention[attenti | | | | +Epoch 12 || on]/BertSelfAttentio | | | | +Epoch 12 || n[self]/NNCFLinear[v | | | | +Epoch 12 || alue]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[11]/B | | | | +Epoch 12 || ertAttention[attenti | | | | +Epoch 12 || on]/BertSelfOutput[o | | | | +Epoch 12 || utput]/NNCFLinear[de | | | | +Epoch 12 || nse]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [3072, 768] | 0.896 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[11]/B | | | | +Epoch 12 || ertIntermediate[inte | | | | +Epoch 12 || rmediate]/NNCFLinear | | | | +Epoch 12 || [dense]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 3072] | 0.898 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[11]/B | | | | +Epoch 12 || ertOutput[output]/NN | | | | +Epoch 12 || CFLinear[dense]/line | | | | +Epoch 12 || ar_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 | +Epoch 12 |Statistics of the magnitude sparsity algorithm: +Epoch 12 |+----------------------------------------------------------------------+-------+ +Epoch 12 || Statistic's name | Value | +Epoch 12 |+======================================================================+=======+ +Epoch 12 || A target level of the sparsity for the algorithm for the current | 0.800 | +Epoch 12 || epoch | | +Epoch 12 |+----------------------------------------------------------------------+-------+ +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || Layer's name | Sparsity threshold | +Epoch 12 |+=========================================================+====================+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 12 || near_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 12 || inear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 12 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 12 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 12 || near_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 12 || inear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 12 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 12 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 12 || near_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 12 || inear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 12 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 12 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 12 || near_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 12 || inear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 12 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 12 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 12 || near_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 12 || inear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 12 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 12 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 12 || near_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 12 || inear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 12 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 12 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 12 || near_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 12 || inear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 12 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 12 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 12 || near_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 12 || inear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 12 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 12 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 12 || near_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 12 || inear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 12 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 12 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 12 || near_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 12 || inear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 12 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 12 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 12 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 12 || /linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 12 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 12 || inear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 12 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 12 || /linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 12 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 12 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 12 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 12 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 12 || /linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 12 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 12 || inear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 12 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 12 || /linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 12 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 12 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 12 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 13 |+--------------------------------+-------+ +Epoch 13 || Statistic's name | Value | +Epoch 13 |+================================+=======+ +Epoch 13 || Ratio of enabled quantizations | 100 | +Epoch 13 |+--------------------------------+-------+ +Epoch 13 | +Epoch 13 |Statistics of the quantization share: +Epoch 13 |+----------------------------------+--------------------+ +Epoch 13 || Statistic's name | Value | +Epoch 13 |+==================================+====================+ +Epoch 13 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 13 |+----------------------------------+--------------------+ +Epoch 13 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 13 |+----------------------------------+--------------------+ +Epoch 13 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 13 |+----------------------------------+--------------------+ +Epoch 13 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 13 |+----------------------------------+--------------------+ +Epoch 13 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 13 |+----------------------------------+--------------------+ +Epoch 13 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 13 |+----------------------------------+--------------------+ +Epoch 13 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 13 |+----------------------------------+--------------------+ +Epoch 13 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 13 |+----------------------------------+--------------------+ +Epoch 13 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 13 |+----------------------------------+--------------------+ +Epoch 13 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 13 |+----------------------------------+--------------------+ +Epoch 13 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 13 |+----------------------------------+--------------------+ +Epoch 13 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 13 |+----------------------------------+--------------------+ +Epoch 13 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 13 |+----------------------------------+--------------------+ +Epoch 13 | +Epoch 13 |Statistics of the bitwidth distribution: +Epoch 13 |+--------------+---------------------+--------------------+--------------------+ +Epoch 13 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 13 || | WQs | Placed AQs | Qs | +Epoch 13 |+==============+=====================+====================+====================+ +Epoch 13 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 13 || | | | 173) | +Epoch 13 |+--------------+---------------------+--------------------+--------------------+ +Epoch 13 | +Epoch 13 |Statistics of the sparsified model: +Epoch 13 |+-----------------------------------------+-------+ +Epoch 13 || Statistic's name | Value | +Epoch 13 |+=========================================+=======+ +Epoch 13 || Sparsity level of the whole model | 0.621 | +Epoch 13 |+-----------------------------------------+-------+ +Epoch 13 || Sparsity level of all sparsified layers | 0.800 | +Epoch 13 |+-----------------------------------------+-------+ +Epoch 13 | +Epoch 13 |Statistics by sparsified layers: +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 13 |+======================+================+================+=====================+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[0]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[qu | | | | +Epoch 13 || ery]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.612 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[0]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[ke | | | | +Epoch 13 || y]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[0]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[va | | | | +Epoch 13 || lue]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.621 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[0]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfOutput[ou | | | | +Epoch 13 || tput]/NNCFLinear[den | | | | +Epoch 13 || se]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [3072, 768] | 0.896 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[0]/Be | | | | +Epoch 13 || rtIntermediate[inter | | | | +Epoch 13 || mediate]/NNCFLinear[ | | | | +Epoch 13 || dense]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 3072] | 0.902 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[0]/Be | | | | +Epoch 13 || rtOutput[output]/NNC | | | | +Epoch 13 || FLinear[dense]/linea | | | | +Epoch 13 || r_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[1]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[qu | | | | +Epoch 13 || ery]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.603 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[1]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[ke | | | | +Epoch 13 || y]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[1]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[va | | | | +Epoch 13 || lue]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.622 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[1]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfOutput[ou | | | | +Epoch 13 || tput]/NNCFLinear[den | | | | +Epoch 13 || se]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[1]/Be | | | | +Epoch 13 || rtIntermediate[inter | | | | +Epoch 13 || mediate]/NNCFLinear[ | | | | +Epoch 13 || dense]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 3072] | 0.904 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[1]/Be | | | | +Epoch 13 || rtOutput[output]/NNC | | | | +Epoch 13 || FLinear[dense]/linea | | | | +Epoch 13 || r_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.620 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[2]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[qu | | | | +Epoch 13 || ery]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.620 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[2]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[ke | | | | +Epoch 13 || y]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[2]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[va | | | | +Epoch 13 || lue]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.614 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[2]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfOutput[ou | | | | +Epoch 13 || tput]/NNCFLinear[den | | | | +Epoch 13 || se]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [3072, 768] | 0.898 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[2]/Be | | | | +Epoch 13 || rtIntermediate[inter | | | | +Epoch 13 || mediate]/NNCFLinear[ | | | | +Epoch 13 || dense]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 3072] | 0.902 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[2]/Be | | | | +Epoch 13 || rtOutput[output]/NNC | | | | +Epoch 13 || FLinear[dense]/linea | | | | +Epoch 13 || r_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.597 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[3]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[qu | | | | +Epoch 13 || ery]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.599 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[3]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[ke | | | | +Epoch 13 || y]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[3]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[va | | | | +Epoch 13 || lue]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.612 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[3]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfOutput[ou | | | | +Epoch 13 || tput]/NNCFLinear[den | | | | +Epoch 13 || se]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [3072, 768] | 0.898 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[3]/Be | | | | +Epoch 13 || rtIntermediate[inter | | | | +Epoch 13 || mediate]/NNCFLinear[ | | | | +Epoch 13 || dense]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 3072] | 0.905 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[3]/Be | | | | +Epoch 13 || rtOutput[output]/NNC | | | | +Epoch 13 || FLinear[dense]/linea | | | | +Epoch 13 || r_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[4]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[qu | | | | +Epoch 13 || ery]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[4]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[ke | | | | +Epoch 13 || y]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[4]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[va | | | | +Epoch 13 || lue]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[4]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfOutput[ou | | | | +Epoch 13 || tput]/NNCFLinear[den | | | | +Epoch 13 || se]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[4]/Be | | | | +Epoch 13 || rtIntermediate[inter | | | | +Epoch 13 || mediate]/NNCFLinear[ | | | | +Epoch 13 || dense]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 3072] | 0.904 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[4]/Be | | | | +Epoch 13 || rtOutput[output]/NNC | | | | +Epoch 13 || FLinear[dense]/linea | | | | +Epoch 13 || r_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[5]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[qu | | | | +Epoch 13 || ery]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.596 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[5]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[ke | | | | +Epoch 13 || y]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[5]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[va | | | | +Epoch 13 || lue]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[5]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfOutput[ou | | | | +Epoch 13 || tput]/NNCFLinear[den | | | | +Epoch 13 || se]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[5]/Be | | | | +Epoch 13 || rtIntermediate[inter | | | | +Epoch 13 || mediate]/NNCFLinear[ | | | | +Epoch 13 || dense]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 3072] | 0.903 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[5]/Be | | | | +Epoch 13 || rtOutput[output]/NNC | | | | +Epoch 13 || FLinear[dense]/linea | | | | +Epoch 13 || r_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[6]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[qu | | | | +Epoch 13 || ery]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[6]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[ke | | | | +Epoch 13 || y]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[6]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[va | | | | +Epoch 13 || lue]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[6]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfOutput[ou | | | | +Epoch 13 || tput]/NNCFLinear[den | | | | +Epoch 13 || se]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[6]/Be | | | | +Epoch 13 || rtIntermediate[inter | | | | +Epoch 13 || mediate]/NNCFLinear[ | | | | +Epoch 13 || dense]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 3072] | 0.902 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[6]/Be | | | | +Epoch 13 || rtOutput[output]/NNC | | | | +Epoch 13 || FLinear[dense]/linea | | | | +Epoch 13 || r_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[7]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[qu | | | | +Epoch 13 || ery]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[7]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[ke | | | | +Epoch 13 || y]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.599 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[7]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[va | | | | +Epoch 13 || lue]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[7]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfOutput[ou | | | | +Epoch 13 || tput]/NNCFLinear[den | | | | +Epoch 13 || se]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[7]/Be | | | | +Epoch 13 || rtIntermediate[inter | | | | +Epoch 13 || mediate]/NNCFLinear[ | | | | +Epoch 13 || dense]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 3072] | 0.900 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[7]/Be | | | | +Epoch 13 || rtOutput[output]/NNC | | | | +Epoch 13 || FLinear[dense]/linea | | | | +Epoch 13 || r_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[8]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[qu | | | | +Epoch 13 || ery]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[8]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[ke | | | | +Epoch 13 || y]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[8]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[va | | | | +Epoch 13 || lue]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[8]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfOutput[ou | | | | +Epoch 13 || tput]/NNCFLinear[den | | | | +Epoch 13 || se]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[8]/Be | | | | +Epoch 13 || rtIntermediate[inter | | | | +Epoch 13 || mediate]/NNCFLinear[ | | | | +Epoch 13 || dense]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 3072] | 0.900 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[8]/Be | | | | +Epoch 13 || rtOutput[output]/NNC | | | | +Epoch 13 || FLinear[dense]/linea | | | | +Epoch 13 || r_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.588 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[9]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[qu | | | | +Epoch 13 || ery]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.591 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[9]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[ke | | | | +Epoch 13 || y]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[9]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[va | | | | +Epoch 13 || lue]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[9]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfOutput[ou | | | | +Epoch 13 || tput]/NNCFLinear[den | | | | +Epoch 13 || se]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [3072, 768] | 0.900 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[9]/Be | | | | +Epoch 13 || rtIntermediate[inter | | | | +Epoch 13 || mediate]/NNCFLinear[ | | | | +Epoch 13 || dense]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 3072] | 0.904 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[9]/Be | | | | +Epoch 13 || rtOutput[output]/NNC | | | | +Epoch 13 || FLinear[dense]/linea | | | | +Epoch 13 || r_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[10]/B | | | | +Epoch 13 || ertAttention[attenti | | | | +Epoch 13 || on]/BertSelfAttentio | | | | +Epoch 13 || n[self]/NNCFLinear[q | | | | +Epoch 13 || uery]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.591 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[10]/B | | | | +Epoch 13 || ertAttention[attenti | | | | +Epoch 13 || on]/BertSelfAttentio | | | | +Epoch 13 || n[self]/NNCFLinear[k | | | | +Epoch 13 || ey]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[10]/B | | | | +Epoch 13 || ertAttention[attenti | | | | +Epoch 13 || on]/BertSelfAttentio | | | | +Epoch 13 || n[self]/NNCFLinear[v | | | | +Epoch 13 || alue]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[10]/B | | | | +Epoch 13 || ertAttention[attenti | | | | +Epoch 13 || on]/BertSelfOutput[o | | | | +Epoch 13 || utput]/NNCFLinear[de | | | | +Epoch 13 || nse]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [3072, 768] | 0.898 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[10]/B | | | | +Epoch 13 || ertIntermediate[inte | | | | +Epoch 13 || rmediate]/NNCFLinear | | | | +Epoch 13 || [dense]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 3072] | 0.902 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[10]/B | | | | +Epoch 13 || ertOutput[output]/NN | | | | +Epoch 13 || CFLinear[dense]/line | | | | +Epoch 13 || ar_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[11]/B | | | | +Epoch 13 || ertAttention[attenti | | | | +Epoch 13 || on]/BertSelfAttentio | | | | +Epoch 13 || n[self]/NNCFLinear[q | | | | +Epoch 13 || uery]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.590 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[11]/B | | | | +Epoch 13 || ertAttention[attenti | | | | +Epoch 13 || on]/BertSelfAttentio | | | | +Epoch 13 || n[self]/NNCFLinear[k | | | | +Epoch 13 || ey]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[11]/B | | | | +Epoch 13 || ertAttention[attenti | | | | +Epoch 13 || on]/BertSelfAttentio | | | | +Epoch 13 || n[self]/NNCFLinear[v | | | | +Epoch 13 || alue]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[11]/B | | | | +Epoch 13 || ertAttention[attenti | | | | +Epoch 13 || on]/BertSelfOutput[o | | | | +Epoch 13 || utput]/NNCFLinear[de | | | | +Epoch 13 || nse]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [3072, 768] | 0.896 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[11]/B | | | | +Epoch 13 || ertIntermediate[inte | | | | +Epoch 13 || rmediate]/NNCFLinear | | | | +Epoch 13 || [dense]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 3072] | 0.898 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[11]/B | | | | +Epoch 13 || ertOutput[output]/NN | | | | +Epoch 13 || CFLinear[dense]/line | | | | +Epoch 13 || ar_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 | +Epoch 13 |Statistics of the magnitude sparsity algorithm: +Epoch 13 |+----------------------------------------------------------------------+-------+ +Epoch 13 || Statistic's name | Value | +Epoch 13 |+======================================================================+=======+ +Epoch 13 || A target level of the sparsity for the algorithm for the current | 0.800 | +Epoch 13 || epoch | | +Epoch 13 |+----------------------------------------------------------------------+-------+ +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || Layer's name | Sparsity threshold | +Epoch 13 |+=========================================================+====================+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 13 || near_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 13 || inear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 13 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 13 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 13 || near_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 13 || inear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 13 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 13 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 13 || near_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 13 || inear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 13 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 13 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 13 || near_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 13 || inear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 13 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 13 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 13 || near_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 13 || inear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 13 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 13 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 13 || near_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 13 || inear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 13 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 13 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 13 || near_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 13 || inear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 13 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 13 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 13 || near_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 13 || inear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 13 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 13 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 13 || near_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 13 || inear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 13 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 13 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 13 || near_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 13 || inear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 13 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 13 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 13 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 13 || /linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 13 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 13 || inear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 13 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 13 || /linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 13 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 13 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 13 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 13 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 13 || /linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 13 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 13 || inear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 13 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 13 || /linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 13 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 13 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 13 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 14 |+--------------------------------+-------+ +Epoch 14 || Statistic's name | Value | +Epoch 14 |+================================+=======+ +Epoch 14 || Ratio of enabled quantizations | 100 | +Epoch 14 |+--------------------------------+-------+ +Epoch 14 | +Epoch 14 |Statistics of the quantization share: +Epoch 14 |+----------------------------------+--------------------+ +Epoch 14 || Statistic's name | Value | +Epoch 14 |+==================================+====================+ +Epoch 14 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 14 |+----------------------------------+--------------------+ +Epoch 14 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 14 |+----------------------------------+--------------------+ +Epoch 14 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 14 |+----------------------------------+--------------------+ +Epoch 14 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 14 |+----------------------------------+--------------------+ +Epoch 14 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 14 |+----------------------------------+--------------------+ +Epoch 14 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 14 |+----------------------------------+--------------------+ +Epoch 14 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 14 |+----------------------------------+--------------------+ +Epoch 14 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 14 |+----------------------------------+--------------------+ +Epoch 14 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 14 |+----------------------------------+--------------------+ +Epoch 14 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 14 |+----------------------------------+--------------------+ +Epoch 14 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 14 |+----------------------------------+--------------------+ +Epoch 14 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 14 |+----------------------------------+--------------------+ +Epoch 14 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 14 |+----------------------------------+--------------------+ +Epoch 14 | +Epoch 14 |Statistics of the bitwidth distribution: +Epoch 14 |+--------------+---------------------+--------------------+--------------------+ +Epoch 14 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 14 || | WQs | Placed AQs | Qs | +Epoch 14 |+==============+=====================+====================+====================+ +Epoch 14 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 14 || | | | 173) | +Epoch 14 |+--------------+---------------------+--------------------+--------------------+ +Epoch 14 | +Epoch 14 |Statistics of the sparsified model: +Epoch 14 |+-----------------------------------------+-------+ +Epoch 14 || Statistic's name | Value | +Epoch 14 |+=========================================+=======+ +Epoch 14 || Sparsity level of the whole model | 0.621 | +Epoch 14 |+-----------------------------------------+-------+ +Epoch 14 || Sparsity level of all sparsified layers | 0.800 | +Epoch 14 |+-----------------------------------------+-------+ +Epoch 14 | +Epoch 14 |Statistics by sparsified layers: +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 14 |+======================+================+================+=====================+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[0]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[qu | | | | +Epoch 14 || ery]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.612 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[0]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[ke | | | | +Epoch 14 || y]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[0]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[va | | | | +Epoch 14 || lue]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.621 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[0]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfOutput[ou | | | | +Epoch 14 || tput]/NNCFLinear[den | | | | +Epoch 14 || se]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [3072, 768] | 0.896 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[0]/Be | | | | +Epoch 14 || rtIntermediate[inter | | | | +Epoch 14 || mediate]/NNCFLinear[ | | | | +Epoch 14 || dense]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 3072] | 0.902 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[0]/Be | | | | +Epoch 14 || rtOutput[output]/NNC | | | | +Epoch 14 || FLinear[dense]/linea | | | | +Epoch 14 || r_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[1]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[qu | | | | +Epoch 14 || ery]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.603 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[1]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[ke | | | | +Epoch 14 || y]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[1]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[va | | | | +Epoch 14 || lue]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.622 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[1]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfOutput[ou | | | | +Epoch 14 || tput]/NNCFLinear[den | | | | +Epoch 14 || se]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[1]/Be | | | | +Epoch 14 || rtIntermediate[inter | | | | +Epoch 14 || mediate]/NNCFLinear[ | | | | +Epoch 14 || dense]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 3072] | 0.904 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[1]/Be | | | | +Epoch 14 || rtOutput[output]/NNC | | | | +Epoch 14 || FLinear[dense]/linea | | | | +Epoch 14 || r_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.620 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[2]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[qu | | | | +Epoch 14 || ery]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.620 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[2]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[ke | | | | +Epoch 14 || y]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[2]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[va | | | | +Epoch 14 || lue]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.614 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[2]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfOutput[ou | | | | +Epoch 14 || tput]/NNCFLinear[den | | | | +Epoch 14 || se]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [3072, 768] | 0.898 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[2]/Be | | | | +Epoch 14 || rtIntermediate[inter | | | | +Epoch 14 || mediate]/NNCFLinear[ | | | | +Epoch 14 || dense]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 3072] | 0.902 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[2]/Be | | | | +Epoch 14 || rtOutput[output]/NNC | | | | +Epoch 14 || FLinear[dense]/linea | | | | +Epoch 14 || r_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.597 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[3]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[qu | | | | +Epoch 14 || ery]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.599 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[3]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[ke | | | | +Epoch 14 || y]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[3]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[va | | | | +Epoch 14 || lue]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.612 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[3]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfOutput[ou | | | | +Epoch 14 || tput]/NNCFLinear[den | | | | +Epoch 14 || se]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [3072, 768] | 0.898 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[3]/Be | | | | +Epoch 14 || rtIntermediate[inter | | | | +Epoch 14 || mediate]/NNCFLinear[ | | | | +Epoch 14 || dense]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 3072] | 0.905 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[3]/Be | | | | +Epoch 14 || rtOutput[output]/NNC | | | | +Epoch 14 || FLinear[dense]/linea | | | | +Epoch 14 || r_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[4]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[qu | | | | +Epoch 14 || ery]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[4]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[ke | | | | +Epoch 14 || y]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[4]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[va | | | | +Epoch 14 || lue]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[4]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfOutput[ou | | | | +Epoch 14 || tput]/NNCFLinear[den | | | | +Epoch 14 || se]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[4]/Be | | | | +Epoch 14 || rtIntermediate[inter | | | | +Epoch 14 || mediate]/NNCFLinear[ | | | | +Epoch 14 || dense]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 3072] | 0.904 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[4]/Be | | | | +Epoch 14 || rtOutput[output]/NNC | | | | +Epoch 14 || FLinear[dense]/linea | | | | +Epoch 14 || r_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[5]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[qu | | | | +Epoch 14 || ery]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.596 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[5]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[ke | | | | +Epoch 14 || y]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[5]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[va | | | | +Epoch 14 || lue]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[5]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfOutput[ou | | | | +Epoch 14 || tput]/NNCFLinear[den | | | | +Epoch 14 || se]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[5]/Be | | | | +Epoch 14 || rtIntermediate[inter | | | | +Epoch 14 || mediate]/NNCFLinear[ | | | | +Epoch 14 || dense]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 3072] | 0.903 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[5]/Be | | | | +Epoch 14 || rtOutput[output]/NNC | | | | +Epoch 14 || FLinear[dense]/linea | | | | +Epoch 14 || r_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[6]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[qu | | | | +Epoch 14 || ery]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[6]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[ke | | | | +Epoch 14 || y]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[6]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[va | | | | +Epoch 14 || lue]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[6]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfOutput[ou | | | | +Epoch 14 || tput]/NNCFLinear[den | | | | +Epoch 14 || se]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[6]/Be | | | | +Epoch 14 || rtIntermediate[inter | | | | +Epoch 14 || mediate]/NNCFLinear[ | | | | +Epoch 14 || dense]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 3072] | 0.902 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[6]/Be | | | | +Epoch 14 || rtOutput[output]/NNC | | | | +Epoch 14 || FLinear[dense]/linea | | | | +Epoch 14 || r_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[7]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[qu | | | | +Epoch 14 || ery]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[7]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[ke | | | | +Epoch 14 || y]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.599 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[7]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[va | | | | +Epoch 14 || lue]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[7]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfOutput[ou | | | | +Epoch 14 || tput]/NNCFLinear[den | | | | +Epoch 14 || se]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[7]/Be | | | | +Epoch 14 || rtIntermediate[inter | | | | +Epoch 14 || mediate]/NNCFLinear[ | | | | +Epoch 14 || dense]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 3072] | 0.900 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[7]/Be | | | | +Epoch 14 || rtOutput[output]/NNC | | | | +Epoch 14 || FLinear[dense]/linea | | | | +Epoch 14 || r_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[8]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[qu | | | | +Epoch 14 || ery]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[8]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[ke | | | | +Epoch 14 || y]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[8]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[va | | | | +Epoch 14 || lue]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[8]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfOutput[ou | | | | +Epoch 14 || tput]/NNCFLinear[den | | | | +Epoch 14 || se]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[8]/Be | | | | +Epoch 14 || rtIntermediate[inter | | | | +Epoch 14 || mediate]/NNCFLinear[ | | | | +Epoch 14 || dense]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 3072] | 0.900 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[8]/Be | | | | +Epoch 14 || rtOutput[output]/NNC | | | | +Epoch 14 || FLinear[dense]/linea | | | | +Epoch 14 || r_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.588 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[9]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[qu | | | | +Epoch 14 || ery]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.591 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[9]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[ke | | | | +Epoch 14 || y]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[9]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[va | | | | +Epoch 14 || lue]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[9]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfOutput[ou | | | | +Epoch 14 || tput]/NNCFLinear[den | | | | +Epoch 14 || se]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [3072, 768] | 0.900 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[9]/Be | | | | +Epoch 14 || rtIntermediate[inter | | | | +Epoch 14 || mediate]/NNCFLinear[ | | | | +Epoch 14 || dense]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 3072] | 0.904 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[9]/Be | | | | +Epoch 14 || rtOutput[output]/NNC | | | | +Epoch 14 || FLinear[dense]/linea | | | | +Epoch 14 || r_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[10]/B | | | | +Epoch 14 || ertAttention[attenti | | | | +Epoch 14 || on]/BertSelfAttentio | | | | +Epoch 14 || n[self]/NNCFLinear[q | | | | +Epoch 14 || uery]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.591 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[10]/B | | | | +Epoch 14 || ertAttention[attenti | | | | +Epoch 14 || on]/BertSelfAttentio | | | | +Epoch 14 || n[self]/NNCFLinear[k | | | | +Epoch 14 || ey]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[10]/B | | | | +Epoch 14 || ertAttention[attenti | | | | +Epoch 14 || on]/BertSelfAttentio | | | | +Epoch 14 || n[self]/NNCFLinear[v | | | | +Epoch 14 || alue]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[10]/B | | | | +Epoch 14 || ertAttention[attenti | | | | +Epoch 14 || on]/BertSelfOutput[o | | | | +Epoch 14 || utput]/NNCFLinear[de | | | | +Epoch 14 || nse]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [3072, 768] | 0.898 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[10]/B | | | | +Epoch 14 || ertIntermediate[inte | | | | +Epoch 14 || rmediate]/NNCFLinear | | | | +Epoch 14 || [dense]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 3072] | 0.902 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[10]/B | | | | +Epoch 14 || ertOutput[output]/NN | | | | +Epoch 14 || CFLinear[dense]/line | | | | +Epoch 14 || ar_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[11]/B | | | | +Epoch 14 || ertAttention[attenti | | | | +Epoch 14 || on]/BertSelfAttentio | | | | +Epoch 14 || n[self]/NNCFLinear[q | | | | +Epoch 14 || uery]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.590 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[11]/B | | | | +Epoch 14 || ertAttention[attenti | | | | +Epoch 14 || on]/BertSelfAttentio | | | | +Epoch 14 || n[self]/NNCFLinear[k | | | | +Epoch 14 || ey]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[11]/B | | | | +Epoch 14 || ertAttention[attenti | | | | +Epoch 14 || on]/BertSelfAttentio | | | | +Epoch 14 || n[self]/NNCFLinear[v | | | | +Epoch 14 || alue]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[11]/B | | | | +Epoch 14 || ertAttention[attenti | | | | +Epoch 14 || on]/BertSelfOutput[o | | | | +Epoch 14 || utput]/NNCFLinear[de | | | | +Epoch 14 || nse]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [3072, 768] | 0.896 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[11]/B | | | | +Epoch 14 || ertIntermediate[inte | | | | +Epoch 14 || rmediate]/NNCFLinear | | | | +Epoch 14 || [dense]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 3072] | 0.898 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[11]/B | | | | +Epoch 14 || ertOutput[output]/NN | | | | +Epoch 14 || CFLinear[dense]/line | | | | +Epoch 14 || ar_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 | +Epoch 14 |Statistics of the magnitude sparsity algorithm: +Epoch 14 |+----------------------------------------------------------------------+-------+ +Epoch 14 || Statistic's name | Value | +Epoch 14 |+======================================================================+=======+ +Epoch 14 || A target level of the sparsity for the algorithm for the current | 0.800 | +Epoch 14 || epoch | | +Epoch 14 |+----------------------------------------------------------------------+-------+ +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || Layer's name | Sparsity threshold | +Epoch 14 |+=========================================================+====================+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 14 || near_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 14 || inear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 14 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 14 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 14 || near_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 14 || inear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 14 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 14 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 14 || near_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 14 || inear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 14 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 14 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 14 || near_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 14 || inear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 14 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 14 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 14 || near_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 14 || inear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 14 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 14 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 14 || near_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 14 || inear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 14 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 14 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 14 || near_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 14 || inear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 14 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 14 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 14 || near_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 14 || inear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 14 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 14 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 14 || near_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 14 || inear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 14 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 14 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 14 || near_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 14 || inear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 14 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 14 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 14 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 14 || /linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 14 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 14 || inear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 14 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 14 || /linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 14 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 14 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 14 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 14 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 14 || /linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 14 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 14 || inear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 14 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 14 || /linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 14 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 14 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 14 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 15 |+--------------------------------+-------+ +Epoch 15 || Statistic's name | Value | +Epoch 15 |+================================+=======+ +Epoch 15 || Ratio of enabled quantizations | 100 | +Epoch 15 |+--------------------------------+-------+ +Epoch 15 | +Epoch 15 |Statistics of the quantization share: +Epoch 15 |+----------------------------------+--------------------+ +Epoch 15 || Statistic's name | Value | +Epoch 15 |+==================================+====================+ +Epoch 15 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 15 |+----------------------------------+--------------------+ +Epoch 15 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 15 |+----------------------------------+--------------------+ +Epoch 15 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 15 |+----------------------------------+--------------------+ +Epoch 15 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 15 |+----------------------------------+--------------------+ +Epoch 15 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 15 |+----------------------------------+--------------------+ +Epoch 15 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 15 |+----------------------------------+--------------------+ +Epoch 15 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 15 |+----------------------------------+--------------------+ +Epoch 15 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 15 |+----------------------------------+--------------------+ +Epoch 15 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 15 |+----------------------------------+--------------------+ +Epoch 15 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 15 |+----------------------------------+--------------------+ +Epoch 15 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 15 |+----------------------------------+--------------------+ +Epoch 15 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 15 |+----------------------------------+--------------------+ +Epoch 15 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 15 |+----------------------------------+--------------------+ +Epoch 15 | +Epoch 15 |Statistics of the bitwidth distribution: +Epoch 15 |+--------------+---------------------+--------------------+--------------------+ +Epoch 15 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 15 || | WQs | Placed AQs | Qs | +Epoch 15 |+==============+=====================+====================+====================+ +Epoch 15 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 15 || | | | 173) | +Epoch 15 |+--------------+---------------------+--------------------+--------------------+ +Epoch 15 | +Epoch 15 |Statistics of the sparsified model: +Epoch 15 |+-----------------------------------------+-------+ +Epoch 15 || Statistic's name | Value | +Epoch 15 |+=========================================+=======+ +Epoch 15 || Sparsity level of the whole model | 0.621 | +Epoch 15 |+-----------------------------------------+-------+ +Epoch 15 || Sparsity level of all sparsified layers | 0.800 | +Epoch 15 |+-----------------------------------------+-------+ +Epoch 15 | +Epoch 15 |Statistics by sparsified layers: +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 15 |+======================+================+================+=====================+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[0]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[qu | | | | +Epoch 15 || ery]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.612 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[0]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[ke | | | | +Epoch 15 || y]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[0]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[va | | | | +Epoch 15 || lue]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.621 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[0]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfOutput[ou | | | | +Epoch 15 || tput]/NNCFLinear[den | | | | +Epoch 15 || se]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [3072, 768] | 0.896 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[0]/Be | | | | +Epoch 15 || rtIntermediate[inter | | | | +Epoch 15 || mediate]/NNCFLinear[ | | | | +Epoch 15 || dense]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 3072] | 0.902 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[0]/Be | | | | +Epoch 15 || rtOutput[output]/NNC | | | | +Epoch 15 || FLinear[dense]/linea | | | | +Epoch 15 || r_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[1]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[qu | | | | +Epoch 15 || ery]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.603 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[1]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[ke | | | | +Epoch 15 || y]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[1]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[va | | | | +Epoch 15 || lue]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.622 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[1]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfOutput[ou | | | | +Epoch 15 || tput]/NNCFLinear[den | | | | +Epoch 15 || se]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[1]/Be | | | | +Epoch 15 || rtIntermediate[inter | | | | +Epoch 15 || mediate]/NNCFLinear[ | | | | +Epoch 15 || dense]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 3072] | 0.904 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[1]/Be | | | | +Epoch 15 || rtOutput[output]/NNC | | | | +Epoch 15 || FLinear[dense]/linea | | | | +Epoch 15 || r_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.620 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[2]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[qu | | | | +Epoch 15 || ery]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.620 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[2]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[ke | | | | +Epoch 15 || y]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[2]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[va | | | | +Epoch 15 || lue]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.614 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[2]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfOutput[ou | | | | +Epoch 15 || tput]/NNCFLinear[den | | | | +Epoch 15 || se]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [3072, 768] | 0.898 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[2]/Be | | | | +Epoch 15 || rtIntermediate[inter | | | | +Epoch 15 || mediate]/NNCFLinear[ | | | | +Epoch 15 || dense]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 3072] | 0.902 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[2]/Be | | | | +Epoch 15 || rtOutput[output]/NNC | | | | +Epoch 15 || FLinear[dense]/linea | | | | +Epoch 15 || r_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.597 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[3]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[qu | | | | +Epoch 15 || ery]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.599 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[3]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[ke | | | | +Epoch 15 || y]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[3]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[va | | | | +Epoch 15 || lue]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.612 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[3]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfOutput[ou | | | | +Epoch 15 || tput]/NNCFLinear[den | | | | +Epoch 15 || se]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [3072, 768] | 0.898 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[3]/Be | | | | +Epoch 15 || rtIntermediate[inter | | | | +Epoch 15 || mediate]/NNCFLinear[ | | | | +Epoch 15 || dense]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 3072] | 0.905 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[3]/Be | | | | +Epoch 15 || rtOutput[output]/NNC | | | | +Epoch 15 || FLinear[dense]/linea | | | | +Epoch 15 || r_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[4]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[qu | | | | +Epoch 15 || ery]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[4]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[ke | | | | +Epoch 15 || y]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[4]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[va | | | | +Epoch 15 || lue]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[4]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfOutput[ou | | | | +Epoch 15 || tput]/NNCFLinear[den | | | | +Epoch 15 || se]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[4]/Be | | | | +Epoch 15 || rtIntermediate[inter | | | | +Epoch 15 || mediate]/NNCFLinear[ | | | | +Epoch 15 || dense]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 3072] | 0.904 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[4]/Be | | | | +Epoch 15 || rtOutput[output]/NNC | | | | +Epoch 15 || FLinear[dense]/linea | | | | +Epoch 15 || r_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[5]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[qu | | | | +Epoch 15 || ery]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.596 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[5]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[ke | | | | +Epoch 15 || y]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[5]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[va | | | | +Epoch 15 || lue]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[5]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfOutput[ou | | | | +Epoch 15 || tput]/NNCFLinear[den | | | | +Epoch 15 || se]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[5]/Be | | | | +Epoch 15 || rtIntermediate[inter | | | | +Epoch 15 || mediate]/NNCFLinear[ | | | | +Epoch 15 || dense]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 3072] | 0.903 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[5]/Be | | | | +Epoch 15 || rtOutput[output]/NNC | | | | +Epoch 15 || FLinear[dense]/linea | | | | +Epoch 15 || r_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[6]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[qu | | | | +Epoch 15 || ery]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[6]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[ke | | | | +Epoch 15 || y]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[6]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[va | | | | +Epoch 15 || lue]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[6]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfOutput[ou | | | | +Epoch 15 || tput]/NNCFLinear[den | | | | +Epoch 15 || se]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[6]/Be | | | | +Epoch 15 || rtIntermediate[inter | | | | +Epoch 15 || mediate]/NNCFLinear[ | | | | +Epoch 15 || dense]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 3072] | 0.902 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[6]/Be | | | | +Epoch 15 || rtOutput[output]/NNC | | | | +Epoch 15 || FLinear[dense]/linea | | | | +Epoch 15 || r_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[7]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[qu | | | | +Epoch 15 || ery]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[7]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[ke | | | | +Epoch 15 || y]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.599 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[7]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[va | | | | +Epoch 15 || lue]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[7]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfOutput[ou | | | | +Epoch 15 || tput]/NNCFLinear[den | | | | +Epoch 15 || se]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[7]/Be | | | | +Epoch 15 || rtIntermediate[inter | | | | +Epoch 15 || mediate]/NNCFLinear[ | | | | +Epoch 15 || dense]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 3072] | 0.900 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[7]/Be | | | | +Epoch 15 || rtOutput[output]/NNC | | | | +Epoch 15 || FLinear[dense]/linea | | | | +Epoch 15 || r_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[8]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[qu | | | | +Epoch 15 || ery]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[8]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[ke | | | | +Epoch 15 || y]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[8]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[va | | | | +Epoch 15 || lue]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[8]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfOutput[ou | | | | +Epoch 15 || tput]/NNCFLinear[den | | | | +Epoch 15 || se]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[8]/Be | | | | +Epoch 15 || rtIntermediate[inter | | | | +Epoch 15 || mediate]/NNCFLinear[ | | | | +Epoch 15 || dense]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 3072] | 0.900 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[8]/Be | | | | +Epoch 15 || rtOutput[output]/NNC | | | | +Epoch 15 || FLinear[dense]/linea | | | | +Epoch 15 || r_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.588 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[9]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[qu | | | | +Epoch 15 || ery]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.591 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[9]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[ke | | | | +Epoch 15 || y]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[9]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[va | | | | +Epoch 15 || lue]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[9]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfOutput[ou | | | | +Epoch 15 || tput]/NNCFLinear[den | | | | +Epoch 15 || se]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [3072, 768] | 0.900 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[9]/Be | | | | +Epoch 15 || rtIntermediate[inter | | | | +Epoch 15 || mediate]/NNCFLinear[ | | | | +Epoch 15 || dense]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 3072] | 0.904 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[9]/Be | | | | +Epoch 15 || rtOutput[output]/NNC | | | | +Epoch 15 || FLinear[dense]/linea | | | | +Epoch 15 || r_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[10]/B | | | | +Epoch 15 || ertAttention[attenti | | | | +Epoch 15 || on]/BertSelfAttentio | | | | +Epoch 15 || n[self]/NNCFLinear[q | | | | +Epoch 15 || uery]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.591 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[10]/B | | | | +Epoch 15 || ertAttention[attenti | | | | +Epoch 15 || on]/BertSelfAttentio | | | | +Epoch 15 || n[self]/NNCFLinear[k | | | | +Epoch 15 || ey]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[10]/B | | | | +Epoch 15 || ertAttention[attenti | | | | +Epoch 15 || on]/BertSelfAttentio | | | | +Epoch 15 || n[self]/NNCFLinear[v | | | | +Epoch 15 || alue]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[10]/B | | | | +Epoch 15 || ertAttention[attenti | | | | +Epoch 15 || on]/BertSelfOutput[o | | | | +Epoch 15 || utput]/NNCFLinear[de | | | | +Epoch 15 || nse]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [3072, 768] | 0.898 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[10]/B | | | | +Epoch 15 || ertIntermediate[inte | | | | +Epoch 15 || rmediate]/NNCFLinear | | | | +Epoch 15 || [dense]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 3072] | 0.902 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[10]/B | | | | +Epoch 15 || ertOutput[output]/NN | | | | +Epoch 15 || CFLinear[dense]/line | | | | +Epoch 15 || ar_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[11]/B | | | | +Epoch 15 || ertAttention[attenti | | | | +Epoch 15 || on]/BertSelfAttentio | | | | +Epoch 15 || n[self]/NNCFLinear[q | | | | +Epoch 15 || uery]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.590 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[11]/B | | | | +Epoch 15 || ertAttention[attenti | | | | +Epoch 15 || on]/BertSelfAttentio | | | | +Epoch 15 || n[self]/NNCFLinear[k | | | | +Epoch 15 || ey]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[11]/B | | | | +Epoch 15 || ertAttention[attenti | | | | +Epoch 15 || on]/BertSelfAttentio | | | | +Epoch 15 || n[self]/NNCFLinear[v | | | | +Epoch 15 || alue]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[11]/B | | | | +Epoch 15 || ertAttention[attenti | | | | +Epoch 15 || on]/BertSelfOutput[o | | | | +Epoch 15 || utput]/NNCFLinear[de | | | | +Epoch 15 || nse]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [3072, 768] | 0.896 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[11]/B | | | | +Epoch 15 || ertIntermediate[inte | | | | +Epoch 15 || rmediate]/NNCFLinear | | | | +Epoch 15 || [dense]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 3072] | 0.898 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[11]/B | | | | +Epoch 15 || ertOutput[output]/NN | | | | +Epoch 15 || CFLinear[dense]/line | | | | +Epoch 15 || ar_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 | +Epoch 15 |Statistics of the magnitude sparsity algorithm: +Epoch 15 |+----------------------------------------------------------------------+-------+ +Epoch 15 || Statistic's name | Value | +Epoch 15 |+======================================================================+=======+ +Epoch 15 || A target level of the sparsity for the algorithm for the current | 0.800 | +Epoch 15 || epoch | | +Epoch 15 |+----------------------------------------------------------------------+-------+ +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || Layer's name | Sparsity threshold | +Epoch 15 |+=========================================================+====================+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 15 || near_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 15 || inear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 15 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 15 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 15 || near_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 15 || inear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 15 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 15 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 15 || near_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 15 || inear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 15 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 15 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 15 || near_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 15 || inear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 15 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 15 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 15 || near_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 15 || inear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 15 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 15 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 15 || near_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 15 || inear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 15 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 15 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 15 || near_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 15 || inear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 15 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 15 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 15 || near_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 15 || inear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 15 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 15 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 15 || near_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 15 || inear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 15 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 15 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 15 || near_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 15 || inear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 15 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 15 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 15 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 15 || /linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 15 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 15 || inear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 15 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 15 || /linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 15 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 15 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 15 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 15 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 15 || /linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 15 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 15 || inear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 15 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 15 || /linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 15 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 15 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 15 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 16 |+--------------------------------+-------+ +Epoch 16 || Statistic's name | Value | +Epoch 16 |+================================+=======+ +Epoch 16 || Ratio of enabled quantizations | 100 | +Epoch 16 |+--------------------------------+-------+ +Epoch 16 | +Epoch 16 |Statistics of the quantization share: +Epoch 16 |+----------------------------------+--------------------+ +Epoch 16 || Statistic's name | Value | +Epoch 16 |+==================================+====================+ +Epoch 16 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 16 |+----------------------------------+--------------------+ +Epoch 16 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 16 |+----------------------------------+--------------------+ +Epoch 16 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 16 |+----------------------------------+--------------------+ +Epoch 16 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 16 |+----------------------------------+--------------------+ +Epoch 16 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 16 |+----------------------------------+--------------------+ +Epoch 16 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 16 |+----------------------------------+--------------------+ +Epoch 16 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 16 |+----------------------------------+--------------------+ +Epoch 16 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 16 |+----------------------------------+--------------------+ +Epoch 16 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 16 |+----------------------------------+--------------------+ +Epoch 16 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 16 |+----------------------------------+--------------------+ +Epoch 16 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 16 |+----------------------------------+--------------------+ +Epoch 16 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 16 |+----------------------------------+--------------------+ +Epoch 16 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 16 |+----------------------------------+--------------------+ +Epoch 16 | +Epoch 16 |Statistics of the bitwidth distribution: +Epoch 16 |+--------------+---------------------+--------------------+--------------------+ +Epoch 16 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 16 || | WQs | Placed AQs | Qs | +Epoch 16 |+==============+=====================+====================+====================+ +Epoch 16 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 16 || | | | 173) | +Epoch 16 |+--------------+---------------------+--------------------+--------------------+ +Epoch 16 | +Epoch 16 |Statistics of the sparsified model: +Epoch 16 |+-----------------------------------------+-------+ +Epoch 16 || Statistic's name | Value | +Epoch 16 |+=========================================+=======+ +Epoch 16 || Sparsity level of the whole model | 0.621 | +Epoch 16 |+-----------------------------------------+-------+ +Epoch 16 || Sparsity level of all sparsified layers | 0.800 | +Epoch 16 |+-----------------------------------------+-------+ +Epoch 16 | +Epoch 16 |Statistics by sparsified layers: +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 16 |+======================+================+================+=====================+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[0]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[qu | | | | +Epoch 16 || ery]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.612 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[0]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[ke | | | | +Epoch 16 || y]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[0]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[va | | | | +Epoch 16 || lue]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.621 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[0]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfOutput[ou | | | | +Epoch 16 || tput]/NNCFLinear[den | | | | +Epoch 16 || se]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [3072, 768] | 0.896 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[0]/Be | | | | +Epoch 16 || rtIntermediate[inter | | | | +Epoch 16 || mediate]/NNCFLinear[ | | | | +Epoch 16 || dense]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 3072] | 0.902 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[0]/Be | | | | +Epoch 16 || rtOutput[output]/NNC | | | | +Epoch 16 || FLinear[dense]/linea | | | | +Epoch 16 || r_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[1]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[qu | | | | +Epoch 16 || ery]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.603 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[1]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[ke | | | | +Epoch 16 || y]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[1]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[va | | | | +Epoch 16 || lue]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.622 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[1]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfOutput[ou | | | | +Epoch 16 || tput]/NNCFLinear[den | | | | +Epoch 16 || se]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[1]/Be | | | | +Epoch 16 || rtIntermediate[inter | | | | +Epoch 16 || mediate]/NNCFLinear[ | | | | +Epoch 16 || dense]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 3072] | 0.904 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[1]/Be | | | | +Epoch 16 || rtOutput[output]/NNC | | | | +Epoch 16 || FLinear[dense]/linea | | | | +Epoch 16 || r_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.620 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[2]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[qu | | | | +Epoch 16 || ery]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.620 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[2]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[ke | | | | +Epoch 16 || y]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[2]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[va | | | | +Epoch 16 || lue]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.614 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[2]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfOutput[ou | | | | +Epoch 16 || tput]/NNCFLinear[den | | | | +Epoch 16 || se]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [3072, 768] | 0.898 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[2]/Be | | | | +Epoch 16 || rtIntermediate[inter | | | | +Epoch 16 || mediate]/NNCFLinear[ | | | | +Epoch 16 || dense]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 3072] | 0.902 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[2]/Be | | | | +Epoch 16 || rtOutput[output]/NNC | | | | +Epoch 16 || FLinear[dense]/linea | | | | +Epoch 16 || r_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.597 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[3]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[qu | | | | +Epoch 16 || ery]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.599 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[3]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[ke | | | | +Epoch 16 || y]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[3]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[va | | | | +Epoch 16 || lue]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.612 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[3]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfOutput[ou | | | | +Epoch 16 || tput]/NNCFLinear[den | | | | +Epoch 16 || se]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [3072, 768] | 0.898 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[3]/Be | | | | +Epoch 16 || rtIntermediate[inter | | | | +Epoch 16 || mediate]/NNCFLinear[ | | | | +Epoch 16 || dense]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 3072] | 0.905 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[3]/Be | | | | +Epoch 16 || rtOutput[output]/NNC | | | | +Epoch 16 || FLinear[dense]/linea | | | | +Epoch 16 || r_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[4]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[qu | | | | +Epoch 16 || ery]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[4]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[ke | | | | +Epoch 16 || y]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[4]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[va | | | | +Epoch 16 || lue]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[4]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfOutput[ou | | | | +Epoch 16 || tput]/NNCFLinear[den | | | | +Epoch 16 || se]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[4]/Be | | | | +Epoch 16 || rtIntermediate[inter | | | | +Epoch 16 || mediate]/NNCFLinear[ | | | | +Epoch 16 || dense]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 3072] | 0.904 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[4]/Be | | | | +Epoch 16 || rtOutput[output]/NNC | | | | +Epoch 16 || FLinear[dense]/linea | | | | +Epoch 16 || r_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[5]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[qu | | | | +Epoch 16 || ery]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.596 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[5]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[ke | | | | +Epoch 16 || y]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[5]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[va | | | | +Epoch 16 || lue]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[5]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfOutput[ou | | | | +Epoch 16 || tput]/NNCFLinear[den | | | | +Epoch 16 || se]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[5]/Be | | | | +Epoch 16 || rtIntermediate[inter | | | | +Epoch 16 || mediate]/NNCFLinear[ | | | | +Epoch 16 || dense]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 3072] | 0.903 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[5]/Be | | | | +Epoch 16 || rtOutput[output]/NNC | | | | +Epoch 16 || FLinear[dense]/linea | | | | +Epoch 16 || r_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[6]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[qu | | | | +Epoch 16 || ery]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[6]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[ke | | | | +Epoch 16 || y]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[6]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[va | | | | +Epoch 16 || lue]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[6]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfOutput[ou | | | | +Epoch 16 || tput]/NNCFLinear[den | | | | +Epoch 16 || se]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[6]/Be | | | | +Epoch 16 || rtIntermediate[inter | | | | +Epoch 16 || mediate]/NNCFLinear[ | | | | +Epoch 16 || dense]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 3072] | 0.902 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[6]/Be | | | | +Epoch 16 || rtOutput[output]/NNC | | | | +Epoch 16 || FLinear[dense]/linea | | | | +Epoch 16 || r_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[7]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[qu | | | | +Epoch 16 || ery]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[7]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[ke | | | | +Epoch 16 || y]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.599 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[7]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[va | | | | +Epoch 16 || lue]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[7]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfOutput[ou | | | | +Epoch 16 || tput]/NNCFLinear[den | | | | +Epoch 16 || se]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[7]/Be | | | | +Epoch 16 || rtIntermediate[inter | | | | +Epoch 16 || mediate]/NNCFLinear[ | | | | +Epoch 16 || dense]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 3072] | 0.900 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[7]/Be | | | | +Epoch 16 || rtOutput[output]/NNC | | | | +Epoch 16 || FLinear[dense]/linea | | | | +Epoch 16 || r_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[8]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[qu | | | | +Epoch 16 || ery]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[8]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[ke | | | | +Epoch 16 || y]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[8]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[va | | | | +Epoch 16 || lue]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[8]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfOutput[ou | | | | +Epoch 16 || tput]/NNCFLinear[den | | | | +Epoch 16 || se]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [3072, 768] | 0.897 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[8]/Be | | | | +Epoch 16 || rtIntermediate[inter | | | | +Epoch 16 || mediate]/NNCFLinear[ | | | | +Epoch 16 || dense]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 3072] | 0.900 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[8]/Be | | | | +Epoch 16 || rtOutput[output]/NNC | | | | +Epoch 16 || FLinear[dense]/linea | | | | +Epoch 16 || r_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.588 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[9]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[qu | | | | +Epoch 16 || ery]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.591 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[9]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[ke | | | | +Epoch 16 || y]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[9]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[va | | | | +Epoch 16 || lue]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[9]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfOutput[ou | | | | +Epoch 16 || tput]/NNCFLinear[den | | | | +Epoch 16 || se]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [3072, 768] | 0.900 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[9]/Be | | | | +Epoch 16 || rtIntermediate[inter | | | | +Epoch 16 || mediate]/NNCFLinear[ | | | | +Epoch 16 || dense]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 3072] | 0.904 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[9]/Be | | | | +Epoch 16 || rtOutput[output]/NNC | | | | +Epoch 16 || FLinear[dense]/linea | | | | +Epoch 16 || r_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[10]/B | | | | +Epoch 16 || ertAttention[attenti | | | | +Epoch 16 || on]/BertSelfAttentio | | | | +Epoch 16 || n[self]/NNCFLinear[q | | | | +Epoch 16 || uery]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.591 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[10]/B | | | | +Epoch 16 || ertAttention[attenti | | | | +Epoch 16 || on]/BertSelfAttentio | | | | +Epoch 16 || n[self]/NNCFLinear[k | | | | +Epoch 16 || ey]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[10]/B | | | | +Epoch 16 || ertAttention[attenti | | | | +Epoch 16 || on]/BertSelfAttentio | | | | +Epoch 16 || n[self]/NNCFLinear[v | | | | +Epoch 16 || alue]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[10]/B | | | | +Epoch 16 || ertAttention[attenti | | | | +Epoch 16 || on]/BertSelfOutput[o | | | | +Epoch 16 || utput]/NNCFLinear[de | | | | +Epoch 16 || nse]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [3072, 768] | 0.898 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[10]/B | | | | +Epoch 16 || ertIntermediate[inte | | | | +Epoch 16 || rmediate]/NNCFLinear | | | | +Epoch 16 || [dense]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 3072] | 0.902 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[10]/B | | | | +Epoch 16 || ertOutput[output]/NN | | | | +Epoch 16 || CFLinear[dense]/line | | | | +Epoch 16 || ar_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[11]/B | | | | +Epoch 16 || ertAttention[attenti | | | | +Epoch 16 || on]/BertSelfAttentio | | | | +Epoch 16 || n[self]/NNCFLinear[q | | | | +Epoch 16 || uery]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.590 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[11]/B | | | | +Epoch 16 || ertAttention[attenti | | | | +Epoch 16 || on]/BertSelfAttentio | | | | +Epoch 16 || n[self]/NNCFLinear[k | | | | +Epoch 16 || ey]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[11]/B | | | | +Epoch 16 || ertAttention[attenti | | | | +Epoch 16 || on]/BertSelfAttentio | | | | +Epoch 16 || n[self]/NNCFLinear[v | | | | +Epoch 16 || alue]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[11]/B | | | | +Epoch 16 || ertAttention[attenti | | | | +Epoch 16 || on]/BertSelfOutput[o | | | | +Epoch 16 || utput]/NNCFLinear[de | | | | +Epoch 16 || nse]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [3072, 768] | 0.896 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[11]/B | | | | +Epoch 16 || ertIntermediate[inte | | | | +Epoch 16 || rmediate]/NNCFLinear | | | | +Epoch 16 || [dense]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 3072] | 0.898 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[11]/B | | | | +Epoch 16 || ertOutput[output]/NN | | | | +Epoch 16 || CFLinear[dense]/line | | | | +Epoch 16 || ar_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 | +Epoch 16 |Statistics of the magnitude sparsity algorithm: +Epoch 16 |+----------------------------------------------------------------------+-------+ +Epoch 16 || Statistic's name | Value | +Epoch 16 |+======================================================================+=======+ +Epoch 16 || A target level of the sparsity for the algorithm for the current | 0.800 | +Epoch 16 || epoch | | +Epoch 16 |+----------------------------------------------------------------------+-------+ +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || Layer's name | Sparsity threshold | +Epoch 16 |+=========================================================+====================+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 16 || near_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 16 || inear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 16 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 16 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 16 || near_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 16 || inear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 16 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 16 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 16 || near_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 16 || inear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 16 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 16 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 16 || near_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 16 || inear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 16 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 16 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 16 || near_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 16 || inear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 16 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 16 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 16 || near_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 16 || inear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 16 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 16 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 16 || near_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 16 || inear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 16 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 16 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 16 || near_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 16 || inear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 16 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 16 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 16 || near_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 16 || inear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 16 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 16 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 16 || near_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 16 || inear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 16 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 16 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 16 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 16 || /linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 16 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 16 || inear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 16 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 16 || /linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 16 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 16 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 16 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 16 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 16 || /linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 16 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 16 || inear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 16 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 16 || /linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 16 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 16 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 16 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+