test_compressor.py 12.4 KB
Newer Older
liuzhe-lz's avatar
liuzhe-lz committed
1
2
3
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

4
from unittest import TestCase, main
5
import numpy as np
6
import tensorflow as tf
7
8
import torch
import torch.nn.functional as F
9
import nni.compression.torch as torch_compressor
10
import math
11

12
13
14
if tf.__version__ >= '2.0':
    import nni.compression.tensorflow as tf_compressor

Tang Lang's avatar
Tang Lang committed
15

16
def get_tf_model():
17
    model = tf.keras.models.Sequential([
18
        tf.keras.layers.Conv2D(filters=5, kernel_size=7, input_shape=[28, 28, 1], activation='relu', padding="SAME"),
19
        tf.keras.layers.MaxPooling2D(pool_size=2),
20
        tf.keras.layers.Conv2D(filters=10, kernel_size=3, activation='relu', padding="SAME"),
21
22
23
24
25
26
27
        tf.keras.layers.MaxPooling2D(pool_size=2),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(units=128, activation='relu'),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(units=10, activation='softmax'),
    ])
    model.compile(loss="sparse_categorical_crossentropy",
Tang Lang's avatar
Tang Lang committed
28
29
                  optimizer=tf.keras.optimizers.SGD(lr=1e-3),
                  metrics=["accuracy"])
30
    return model
31

Tang Lang's avatar
Tang Lang committed
32

33
class TorchModel(torch.nn.Module):
34
35
    def __init__(self):
        super().__init__()
36
        self.conv1 = torch.nn.Conv2d(1, 5, 5, 1)
Tang Lang's avatar
Tang Lang committed
37
        self.bn1 = torch.nn.BatchNorm2d(5)
38
        self.conv2 = torch.nn.Conv2d(5, 10, 5, 1)
Tang Lang's avatar
Tang Lang committed
39
        self.bn2 = torch.nn.BatchNorm2d(10)
40
41
        self.fc1 = torch.nn.Linear(4 * 4 * 10, 100)
        self.fc2 = torch.nn.Linear(100, 10)
42
43

    def forward(self, x):
Tang Lang's avatar
Tang Lang committed
44
        x = F.relu(self.bn1(self.conv1(x)))
45
        x = F.max_pool2d(x, 2, 2)
Tang Lang's avatar
Tang Lang committed
46
        x = F.relu(self.bn2(self.conv2(x)))
47
        x = F.max_pool2d(x, 2, 2)
48
        x = x.view(-1, 4 * 4 * 10)
49
50
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
51
52
        return F.log_softmax(x, dim=1)

Tang Lang's avatar
Tang Lang committed
53

54
def tf2(func):
55
    def test_tf2_func(*args):
56
        if tf.__version__ >= '2.0':
57
            func(*args)
Tang Lang's avatar
Tang Lang committed
58

59
    return test_tf2_func
60

Tang Lang's avatar
Tang Lang committed
61

chicm-ms's avatar
chicm-ms committed
62
# for fpgm filter pruner test
Tang Lang's avatar
Tang Lang committed
63
w = np.array([[[[i + 1] * 3] * 3] * 5 for i in range(10)])
64

Tang Lang's avatar
Tang Lang committed
65

66
class CompressorTestCase(TestCase):
67
68
69
70
71
72
    def test_torch_quantizer_modules_detection(self):
        # test if modules can be detected
        model = TorchModel()
        config_list = [{
            'quant_types': ['weight'],
            'quant_bits': 8,
Tang Lang's avatar
Tang Lang committed
73
            'op_types': ['Conv2d', 'Linear']
74
75
76
77
        }, {
            'quant_types': ['output'],
            'quant_bits': 8,
            'quant_start_step': 0,
Tang Lang's avatar
Tang Lang committed
78
            'op_types': ['ReLU']
79
80
81
82
83
84
        }]

        model.relu = torch.nn.ReLU()
        quantizer = torch_compressor.QAT_Quantizer(model, config_list)
        quantizer.compress()
        modules_to_compress = quantizer.get_modules_to_compress()
Tang Lang's avatar
Tang Lang committed
85
        modules_to_compress_name = [t[0].name for t in modules_to_compress]
86
87
88
89
90
91
92
        assert "conv1" in modules_to_compress_name
        assert "conv2" in modules_to_compress_name
        assert "fc1" in modules_to_compress_name
        assert "fc2" in modules_to_compress_name
        assert "relu" in modules_to_compress_name
        assert len(modules_to_compress_name) == 5

93
94
    def test_torch_level_pruner(self):
        model = TorchModel()
chicm-ms's avatar
chicm-ms committed
95
        configure_list = [{'sparsity': 0.8, 'op_types': ['default']}]
QuanluZhang's avatar
QuanluZhang committed
96
        torch_compressor.LevelPruner(model, configure_list).compress()
97

98
99
100
101
    @tf2
    def test_tf_level_pruner(self):
        configure_list = [{'sparsity': 0.8, 'op_types': ['default']}]
        tf_compressor.LevelPruner(get_tf_model(), configure_list).compress()
102

103
104
    def test_torch_naive_quantizer(self):
        model = TorchModel()
Cjkkkk's avatar
Cjkkkk committed
105
106
107
108
109
        configure_list = [{
            'quant_types': ['weight'],
            'quant_bits': {
                'weight': 8,
            },
Tang Lang's avatar
Tang Lang committed
110
            'op_types': ['Conv2d', 'Linear']
Cjkkkk's avatar
Cjkkkk committed
111
112
        }]
        torch_compressor.NaiveQuantizer(model, configure_list).compress()
113

114
    @tf2
115
116
    def test_tf_naive_quantizer(self):
        tf_compressor.NaiveQuantizer(get_tf_model(), [{'op_types': ['default']}]).compress()
117

118
119
    def test_torch_fpgm_pruner(self):
        """
chicm-ms's avatar
chicm-ms committed
120
        With filters(kernels) weights defined as above (w), it is obvious that w[4] and w[5] is the Geometric Median
121
122
123
124
        which minimize the total geometric distance by defination of Geometric Median in this paper:
        Filter Pruning via Geometric Median for Deep Convolutional Neural Networks Acceleration,
        https://arxiv.org/pdf/1811.00250.pdf

chicm-ms's avatar
chicm-ms committed
125
126
        So if sparsity is 0.2, the expected masks should mask out w[4] and w[5], this can be verified through:
        `all(torch.sum(masks, (1, 2, 3)).numpy() == np.array([45., 45., 45., 45., 0., 0., 45., 45., 45., 45.]))`
127

chicm-ms's avatar
chicm-ms committed
128
129
        If sparsity is 0.6, the expected masks should mask out w[2] - w[7], this can be verified through:
        `all(torch.sum(masks, (1, 2, 3)).numpy() == np.array([45., 45., 0., 0., 0., 0., 0., 0., 45., 45.]))`
130
131
132
133
134
135
136
137
        """

        model = TorchModel()
        config_list = [{'sparsity': 0.2, 'op_types': ['Conv2d']}, {'sparsity': 0.6, 'op_types': ['Conv2d']}]
        pruner = torch_compressor.FPGMPruner(model, config_list)

        model.conv2.weight.data = torch.tensor(w).float()
        layer = torch_compressor.compressor.LayerInfo('conv2', model.conv2)
chicm-ms's avatar
chicm-ms committed
138
        masks = pruner.calc_mask(layer, config_list[0], if_calculated=torch.tensor(0))
139
        assert all(torch.sum(masks['weight'], (1, 2, 3)).numpy() == np.array([45., 45., 45., 45., 0., 0., 45., 45., 45., 45.]))
140
141

        model.conv2.weight.data = torch.tensor(w).float()
chicm-ms's avatar
chicm-ms committed
142
        masks = pruner.calc_mask(layer, config_list[1], if_calculated=torch.tensor(0))
143
        assert all(torch.sum(masks['weight'], (1, 2, 3)).numpy() == np.array([45., 45., 0., 0., 0., 0., 0., 0., 45., 45.]))
144
145
146

    @tf2
    def test_tf_fpgm_pruner(self):
147
148
149
150
151
152
153
154
155
156
        model = get_tf_model()
        config_list = [{'sparsity': 0.2, 'op_types': ['Conv2D']}, {'sparsity': 0.6, 'op_types': ['Conv2D']}]

        pruner = tf_compressor.FPGMPruner(model, config_list)
        weights = model.layers[2].weights
        weights[0] = np.array(w).astype(np.float32).transpose([2, 3, 0, 1]).transpose([0, 1, 3, 2])
        model.layers[2].set_weights([weights[0], weights[1].numpy()])

        layer = tf_compressor.compressor.LayerInfo(model.layers[2])
        masks = pruner.calc_mask(layer, config_list[0]).numpy()
chicm-ms's avatar
chicm-ms committed
157
        masks = masks.reshape((-1, masks.shape[-1])).transpose([1, 0])
158

chicm-ms's avatar
chicm-ms committed
159
        assert all(masks.sum((1)) == np.array([45., 45., 45., 45., 0., 0., 45., 45., 45., 45.]))
160
161
162

        model.layers[2].set_weights([weights[0], weights[1].numpy()])
        masks = pruner.calc_mask(layer, config_list[1]).numpy()
chicm-ms's avatar
chicm-ms committed
163
164
        masks = masks.reshape((-1, masks.shape[-1])).transpose([1, 0])
        assert all(masks.sum((1)) == np.array([45., 45., 0., 0., 0., 0., 0., 0., 45., 45.]))
165

Tang Lang's avatar
Tang Lang committed
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
    def test_torch_l1filter_pruner(self):
        """
        Filters with the minimum sum of the weights' L1 norm are pruned in this paper:
        PRUNING FILTERS FOR EFFICIENT CONVNETS,
        https://arxiv.org/abs/1608.08710

        So if sparsity is 0.2, the expected masks should mask out filter 0, this can be verified through:
        `all(torch.sum(mask1, (1, 2, 3)).numpy() == np.array([0., 27., 27., 27., 27.]))`

        If sparsity is 0.6, the expected masks should mask out filter 0,1,2, this can be verified through:
        `all(torch.sum(mask2, (1, 2, 3)).numpy() == np.array([0., 0., 0., 27., 27.]))`
        """
        w = np.array([np.zeros((3, 3, 3)), np.ones((3, 3, 3)), np.ones((3, 3, 3)) * 2,
                      np.ones((3, 3, 3)) * 3, np.ones((3, 3, 3)) * 4])
        model = TorchModel()
Tang Lang's avatar
Tang Lang committed
181
182
        config_list = [{'sparsity': 0.2, 'op_types': ['Conv2d'], 'op_names': ['conv1']},
                       {'sparsity': 0.6, 'op_types': ['Conv2d'], 'op_names': ['conv2']}]
Tang Lang's avatar
Tang Lang committed
183
184
185
186
187
        pruner = torch_compressor.L1FilterPruner(model, config_list)

        model.conv1.weight.data = torch.tensor(w).float()
        model.conv2.weight.data = torch.tensor(w).float()
        layer1 = torch_compressor.compressor.LayerInfo('conv1', model.conv1)
chicm-ms's avatar
chicm-ms committed
188
        mask1 = pruner.calc_mask(layer1, config_list[0], if_calculated=torch.tensor(0))
Tang Lang's avatar
Tang Lang committed
189
        layer2 = torch_compressor.compressor.LayerInfo('conv2', model.conv2)
chicm-ms's avatar
chicm-ms committed
190
        mask2 = pruner.calc_mask(layer2, config_list[1], if_calculated=torch.tensor(0))
191
192
        assert all(torch.sum(mask1['weight'], (1, 2, 3)).numpy() == np.array([0., 27., 27., 27., 27.]))
        assert all(torch.sum(mask2['weight'], (1, 2, 3)).numpy() == np.array([0., 0., 0., 27., 27.]))
Tang Lang's avatar
Tang Lang committed
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215

    def test_torch_slim_pruner(self):
        """
        Scale factors with minimum l1 norm in the BN layers are pruned in this paper:
        Learning Efficient Convolutional Networks through Network Slimming,
        https://arxiv.org/pdf/1708.06519.pdf

        So if sparsity is 0.2, the expected masks should mask out channel 0, this can be verified through:
        `all(mask1.numpy() == np.array([0., 1., 1., 1., 1.]))`
        `all(mask2.numpy() == np.array([0., 1., 1., 1., 1.]))`

        If sparsity is 0.6, the expected masks should mask out channel 0,1,2, this can be verified through:
        `all(mask1.numpy() == np.array([0., 0., 0., 1., 1.]))`
        `all(mask2.numpy() == np.array([0., 0., 0., 1., 1.]))`
        """
        w = np.array([0, 1, 2, 3, 4])
        model = TorchModel()
        config_list = [{'sparsity': 0.2, 'op_types': ['BatchNorm2d']}]
        model.bn1.weight.data = torch.tensor(w).float()
        model.bn2.weight.data = torch.tensor(-w).float()
        pruner = torch_compressor.SlimPruner(model, config_list)

        layer1 = torch_compressor.compressor.LayerInfo('bn1', model.bn1)
chicm-ms's avatar
chicm-ms committed
216
        mask1 = pruner.calc_mask(layer1, config_list[0], if_calculated=torch.tensor(0))
Tang Lang's avatar
Tang Lang committed
217
        layer2 = torch_compressor.compressor.LayerInfo('bn2', model.bn2)
chicm-ms's avatar
chicm-ms committed
218
        mask2 = pruner.calc_mask(layer2, config_list[0], if_calculated=torch.tensor(0))
219
220
221
222
        assert all(mask1['weight'].numpy() == np.array([0., 1., 1., 1., 1.]))
        assert all(mask2['weight'].numpy() == np.array([0., 1., 1., 1., 1.]))
        assert all(mask1['bias'].numpy() == np.array([0., 1., 1., 1., 1.]))
        assert all(mask2['bias'].numpy() == np.array([0., 1., 1., 1., 1.]))
Tang Lang's avatar
Tang Lang committed
223
224
225
226
227
228
229

        config_list = [{'sparsity': 0.6, 'op_types': ['BatchNorm2d']}]
        model.bn1.weight.data = torch.tensor(w).float()
        model.bn2.weight.data = torch.tensor(w).float()
        pruner = torch_compressor.SlimPruner(model, config_list)

        layer1 = torch_compressor.compressor.LayerInfo('bn1', model.bn1)
chicm-ms's avatar
chicm-ms committed
230
        mask1 = pruner.calc_mask(layer1, config_list[0], if_calculated=torch.tensor(0))
Tang Lang's avatar
Tang Lang committed
231
        layer2 = torch_compressor.compressor.LayerInfo('bn2', model.bn2)
chicm-ms's avatar
chicm-ms committed
232
        mask2 = pruner.calc_mask(layer2, config_list[0], if_calculated=torch.tensor(0))
233
234
235
236
        assert all(mask1['weight'].numpy() == np.array([0., 0., 0., 1., 1.]))
        assert all(mask2['weight'].numpy() == np.array([0., 0., 0., 1., 1.]))
        assert all(mask1['bias'].numpy() == np.array([0., 0., 0., 1., 1.]))
        assert all(mask2['bias'].numpy() == np.array([0., 0., 0., 1., 1.]))
Tang Lang's avatar
Tang Lang committed
237

238
239
240
241
242
    def test_torch_QAT_quantizer(self):
        model = TorchModel()
        config_list = [{
            'quant_types': ['weight'],
            'quant_bits': 8,
Tang Lang's avatar
Tang Lang committed
243
            'op_types': ['Conv2d', 'Linear']
244
245
246
247
        }, {
            'quant_types': ['output'],
            'quant_bits': 8,
            'quant_start_step': 0,
Tang Lang's avatar
Tang Lang committed
248
            'op_types': ['ReLU']
249
250
251
252
253
254
255
256
257
258
259
        }]
        model.relu = torch.nn.ReLU()
        quantizer = torch_compressor.QAT_Quantizer(model, config_list)
        quantizer.compress()
        # test quantize
        # range not including 0
        eps = 1e-7
        weight = torch.tensor([[1, 2], [3, 5]]).float()
        quantize_weight = quantizer.quantize_weight(weight, config_list[0], model.conv2)
        assert math.isclose(model.conv2.scale, 5 / 255, abs_tol=eps)
        assert model.conv2.zero_point == 0
Tang Lang's avatar
Tang Lang committed
260
        # range including 0
261
262
263
        weight = torch.tensor([[-1, 2], [3, 5]]).float()
        quantize_weight = quantizer.quantize_weight(weight, config_list[0], model.conv2)
        assert math.isclose(model.conv2.scale, 6 / 255, abs_tol=eps)
Cjkkkk's avatar
Cjkkkk committed
264
        assert model.conv2.zero_point in (42, 43)
265
266
267
268

        # test ema
        x = torch.tensor([[-0.2, 0], [0.1, 0.2]])
        out = model.relu(x)
chicm-ms's avatar
chicm-ms committed
269
270
        assert math.isclose(model.relu.module.tracked_min_biased, 0, abs_tol=eps)
        assert math.isclose(model.relu.module.tracked_max_biased, 0.002, abs_tol=eps)
271
272
273
274

        quantizer.step()
        x = torch.tensor([[0.2, 0.4], [0.6, 0.8]])
        out = model.relu(x)
chicm-ms's avatar
chicm-ms committed
275
276
        assert math.isclose(model.relu.module.tracked_min_biased, 0.002, abs_tol=eps)
        assert math.isclose(model.relu.module.tracked_max_biased, 0.00998, abs_tol=eps)
277

Tang Lang's avatar
Tang Lang committed
278

279
280
if __name__ == '__main__':
    main()