yujiepan commited on
Commit
389de5b
1 Parent(s): cdb9363

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +157 -0
README.md ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ tags:
5
+ - generated_from_trainer
6
+ datasets:
7
+ - glue
8
+ metrics:
9
+ - accuracy
10
+ model-index:
11
+ - name: yujiepan/bert-base-uncased-sst2-int8-unstructured80-30epoch
12
+ results:
13
+ - task:
14
+ name: Text Classification
15
+ type: text-classification
16
+ dataset:
17
+ name: GLUE SST2
18
+ type: glue
19
+ config: sst2
20
+ split: validation
21
+ args: sst2
22
+ metrics:
23
+ - name: Accuracy
24
+ type: accuracy
25
+ value: 0.9139908256880734
26
+ ---
27
+
28
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
29
+ should probably proofread and complete it, then remove this comment. -->
30
+
31
+ # Joint magnitude pruning, quantization and distillation on BERT-base/SST-2
32
+
33
+ This model conducts unstructured magnitude pruning, quantization and distillation at the same time when finetuning on the GLUE SST2 dataset.
34
+ It achieves the following results on the evaluation set:
35
+ - Loss: 0.41159623861312866
36
+ - Accuracy: 0.9139908256880734
37
+
38
+ ## Setup
39
+
40
+ ```
41
+ conda install pytorch torchvision torchaudio pytorch-cuda=11.6 -c pytorch -c nvidia
42
+ git clone https://github.com/yujiepan-work/optimum-intel.git
43
+ git checkout -b "magnitude-pruning" 01927af543eaea8678671bf8f4eb78fdb29f8930
44
+ cd optimum-intel
45
+ pip install -e .[openvino,nncf]
46
+
47
+ cd examples/openvino/text-classification/
48
+ pip install -r requirements.txt
49
+ pip install wandb # optional
50
+ ```
51
+
52
+ ## NNCF config
53
+
54
+ Create a json file for NNCF compression configuration:
55
+ ```
56
+ [
57
+ {
58
+ "algorithm": "quantization",
59
+ "preset": "mixed",
60
+ "overflow_fix": "disable",
61
+ "initializer": {
62
+ "range": {
63
+ "num_init_samples": 300,
64
+ "type": "mean_min_max"
65
+ },
66
+ "batchnorm_adaptation": {
67
+ "num_bn_adaptation_samples": 0
68
+ }
69
+ },
70
+ "scope_overrides": {
71
+ "activations": {
72
+ "{re}.*matmul_0": {
73
+ "mode": "symmetric"
74
+ }
75
+ }
76
+ },
77
+ "ignored_scopes": [
78
+ "{re}.*Embeddings.*",
79
+ "{re}.*__add___[0-1]",
80
+ "{re}.*layer_norm_0",
81
+ "{re}.*matmul_1",
82
+ "{re}.*__truediv__*"
83
+ ]
84
+ },
85
+ {
86
+ "algorithm": "magnitude_sparsity",
87
+ "ignored_scopes": [
88
+ "{re}.*NNCFEmbedding.*",
89
+ "{re}.*LayerNorm.*",
90
+ "{re}.*pooler.*",
91
+ "{re}.*classifier.*"
92
+ ],
93
+ "sparsity_init": 0.0,
94
+ "params": {
95
+ "power": 3,
96
+ "schedule": "polynomial",
97
+ "sparsity_freeze_epoch": 10,
98
+ "sparsity_target": 0.8,
99
+ "sparsity_target_epoch": 9,
100
+ "steps_per_epoch": 2105,
101
+ "update_per_optimizer_step": true
102
+ }
103
+ }
104
+ ]
105
+ ```
106
+
107
+
108
+ ## Run
109
+
110
+ We use one card for training.
111
+
112
+ ```
113
+ NNCFCFG=/path/to/nncf/config
114
+ python run_glue.py \
115
+ --lr_scheduler_type cosine_with_restarts \
116
+ --cosine_cycle_ratios 8,6,4,4,4,4 \
117
+ --cosine_cycle_decays 1,1,1,1,1,1 \
118
+ --save_best_model_after_epoch -1 \
119
+ --save_best_model_after_sparsity 0.7999 \
120
+ --model_name_or_path textattack/bert-base-uncased-SST-2 \
121
+ --teacher_model_or_path yoshitomo-matsubara/bert-large-uncased-sst2 \
122
+ --distillation_temperature 2 \
123
+ --task_name sst2 \
124
+ --nncf_compression_config $NNCFCFG \
125
+ --distillation_weight 0.95 \
126
+ --output_dir /tmp/bert-base-uncased-sst2-int8-unstructured80-30epoch \
127
+ --run_name bert-base-uncased-sst2-int8-unstructured80-30epoch \
128
+ --overwrite_output_dir \
129
+ --do_train \
130
+ --do_eval \
131
+ --max_seq_length 128 \
132
+ --per_device_train_batch_size 32 \
133
+ --per_device_eval_batch_size 32 \
134
+ --learning_rate 5e-05 \
135
+ --optim adamw_torch \
136
+ --num_train_epochs 30 \
137
+ --logging_steps 1 \
138
+ --evaluation_strategy steps \
139
+ --eval_steps 250 \
140
+ --save_strategy steps \
141
+ --save_steps 250 \
142
+ --save_total_limit 1 \
143
+ --fp16 \
144
+ --seed 1
145
+ ```
146
+
147
+ The best model is stored in the `best_model` folder.
148
+
149
+
150
+ ### Framework versions
151
+
152
+ - Transformers 4.26.0
153
+ - Pytorch 1.13.1+cu116
154
+ - Datasets 2.8.0
155
+ - Tokenizers 0.13.2
156
+
157
+ For a full description of the environment, please refer to `pip-requirements.txt` and `conda-requirements.txt`.