test_compressor.py 11.6 KB
Newer Older
1
from unittest import TestCase, main
2
import numpy as np
3
import tensorflow as tf
4
5
import torch
import torch.nn.functional as F
6
import nni.compression.torch as torch_compressor
7
import math
8

9
10
11
if tf.__version__ >= '2.0':
    import nni.compression.tensorflow as tf_compressor

Tang Lang's avatar
Tang Lang committed
12

13
def get_tf_model():
14
    model = tf.keras.models.Sequential([
15
        tf.keras.layers.Conv2D(filters=5, kernel_size=7, input_shape=[28, 28, 1], activation='relu', padding="SAME"),
16
        tf.keras.layers.MaxPooling2D(pool_size=2),
17
        tf.keras.layers.Conv2D(filters=10, kernel_size=3, activation='relu', padding="SAME"),
18
19
20
21
22
23
24
        tf.keras.layers.MaxPooling2D(pool_size=2),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(units=128, activation='relu'),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(units=10, activation='softmax'),
    ])
    model.compile(loss="sparse_categorical_crossentropy",
Tang Lang's avatar
Tang Lang committed
25
26
                  optimizer=tf.keras.optimizers.SGD(lr=1e-3),
                  metrics=["accuracy"])
27
    return model
28

Tang Lang's avatar
Tang Lang committed
29

30
class TorchModel(torch.nn.Module):
31
32
    def __init__(self):
        super().__init__()
33
        self.conv1 = torch.nn.Conv2d(1, 5, 5, 1)
Tang Lang's avatar
Tang Lang committed
34
        self.bn1 = torch.nn.BatchNorm2d(5)
35
        self.conv2 = torch.nn.Conv2d(5, 10, 5, 1)
Tang Lang's avatar
Tang Lang committed
36
        self.bn2 = torch.nn.BatchNorm2d(10)
37
38
        self.fc1 = torch.nn.Linear(4 * 4 * 10, 100)
        self.fc2 = torch.nn.Linear(100, 10)
39
40

    def forward(self, x):
Tang Lang's avatar
Tang Lang committed
41
        x = F.relu(self.bn1(self.conv1(x)))
42
        x = F.max_pool2d(x, 2, 2)
Tang Lang's avatar
Tang Lang committed
43
        x = F.relu(self.bn2(self.conv2(x)))
44
        x = F.max_pool2d(x, 2, 2)
45
        x = x.view(-1, 4 * 4 * 10)
46
47
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
48
49
        return F.log_softmax(x, dim=1)

Tang Lang's avatar
Tang Lang committed
50

51
def tf2(func):
52
    def test_tf2_func(*args):
53
        if tf.__version__ >= '2.0':
54
            func(*args)
Tang Lang's avatar
Tang Lang committed
55

56
    return test_tf2_func
57

chicm-ms's avatar
chicm-ms committed
58
59
# for fpgm filter pruner test
w = np.array([[[[i+1]*3]*3]*5 for i in range(10)])
60

Tang Lang's avatar
Tang Lang committed
61

62
class CompressorTestCase(TestCase):
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
    def test_torch_quantizer_modules_detection(self):
        # test if modules can be detected
        model = TorchModel()
        config_list = [{
            'quant_types': ['weight'],
            'quant_bits': 8,
            'op_types':['Conv2d', 'Linear']
        }, {
            'quant_types': ['output'],
            'quant_bits': 8,
            'quant_start_step': 0,
            'op_types':['ReLU']
        }]

        model.relu = torch.nn.ReLU()
        quantizer = torch_compressor.QAT_Quantizer(model, config_list)
        quantizer.compress()
        modules_to_compress = quantizer.get_modules_to_compress()
        modules_to_compress_name = [ t[0].name for t in modules_to_compress]
        assert "conv1" in modules_to_compress_name
        assert "conv2" in modules_to_compress_name
        assert "fc1" in modules_to_compress_name
        assert "fc2" in modules_to_compress_name
        assert "relu" in modules_to_compress_name
        assert len(modules_to_compress_name) == 5

89
90
    def test_torch_level_pruner(self):
        model = TorchModel()
chicm-ms's avatar
chicm-ms committed
91
        configure_list = [{'sparsity': 0.8, 'op_types': ['default']}]
QuanluZhang's avatar
QuanluZhang committed
92
        torch_compressor.LevelPruner(model, configure_list).compress()
93

94
95
96
97
    @tf2
    def test_tf_level_pruner(self):
        configure_list = [{'sparsity': 0.8, 'op_types': ['default']}]
        tf_compressor.LevelPruner(get_tf_model(), configure_list).compress()
98

99
100
    def test_torch_naive_quantizer(self):
        model = TorchModel()
Cjkkkk's avatar
Cjkkkk committed
101
102
103
104
105
        configure_list = [{
            'quant_types': ['weight'],
            'quant_bits': {
                'weight': 8,
            },
Tang Lang's avatar
Tang Lang committed
106
            'op_types': ['Conv2d', 'Linear']
Cjkkkk's avatar
Cjkkkk committed
107
108
        }]
        torch_compressor.NaiveQuantizer(model, configure_list).compress()
109

110
    @tf2
111
112
    def test_tf_naive_quantizer(self):
        tf_compressor.NaiveQuantizer(get_tf_model(), [{'op_types': ['default']}]).compress()
113

114
115
    def test_torch_fpgm_pruner(self):
        """
chicm-ms's avatar
chicm-ms committed
116
        With filters(kernels) weights defined as above (w), it is obvious that w[4] and w[5] is the Geometric Median
117
118
119
120
        which minimize the total geometric distance by defination of Geometric Median in this paper:
        Filter Pruning via Geometric Median for Deep Convolutional Neural Networks Acceleration,
        https://arxiv.org/pdf/1811.00250.pdf

chicm-ms's avatar
chicm-ms committed
121
122
        So if sparsity is 0.2, the expected masks should mask out w[4] and w[5], this can be verified through:
        `all(torch.sum(masks, (1, 2, 3)).numpy() == np.array([45., 45., 45., 45., 0., 0., 45., 45., 45., 45.]))`
123

chicm-ms's avatar
chicm-ms committed
124
125
        If sparsity is 0.6, the expected masks should mask out w[2] - w[7], this can be verified through:
        `all(torch.sum(masks, (1, 2, 3)).numpy() == np.array([45., 45., 0., 0., 0., 0., 0., 0., 45., 45.]))`
126
127
128
129
130
131
132
133
134
        """

        model = TorchModel()
        config_list = [{'sparsity': 0.2, 'op_types': ['Conv2d']}, {'sparsity': 0.6, 'op_types': ['Conv2d']}]
        pruner = torch_compressor.FPGMPruner(model, config_list)

        model.conv2.weight.data = torch.tensor(w).float()
        layer = torch_compressor.compressor.LayerInfo('conv2', model.conv2)
        masks = pruner.calc_mask(layer, config_list[0])
chicm-ms's avatar
chicm-ms committed
135
        assert all(torch.sum(masks, (1, 2, 3)).numpy() == np.array([45., 45., 45., 45., 0., 0., 45., 45., 45., 45.]))
136
137
138
139

        pruner.update_epoch(1)
        model.conv2.weight.data = torch.tensor(w).float()
        masks = pruner.calc_mask(layer, config_list[1])
chicm-ms's avatar
chicm-ms committed
140
        assert all(torch.sum(masks, (1, 2, 3)).numpy() == np.array([45., 45., 0., 0., 0., 0., 0., 0., 45., 45.]))
141
142
143

    @tf2
    def test_tf_fpgm_pruner(self):
144
145
146
147
148
149
150
151
152
153
        model = get_tf_model()
        config_list = [{'sparsity': 0.2, 'op_types': ['Conv2D']}, {'sparsity': 0.6, 'op_types': ['Conv2D']}]

        pruner = tf_compressor.FPGMPruner(model, config_list)
        weights = model.layers[2].weights
        weights[0] = np.array(w).astype(np.float32).transpose([2, 3, 0, 1]).transpose([0, 1, 3, 2])
        model.layers[2].set_weights([weights[0], weights[1].numpy()])

        layer = tf_compressor.compressor.LayerInfo(model.layers[2])
        masks = pruner.calc_mask(layer, config_list[0]).numpy()
chicm-ms's avatar
chicm-ms committed
154
        masks = masks.reshape((-1, masks.shape[-1])).transpose([1, 0])
155

chicm-ms's avatar
chicm-ms committed
156
        assert all(masks.sum((1)) == np.array([45., 45., 45., 45., 0., 0., 45., 45., 45., 45.]))
157
158
159
160

        pruner.update_epoch(1)
        model.layers[2].set_weights([weights[0], weights[1].numpy()])
        masks = pruner.calc_mask(layer, config_list[1]).numpy()
chicm-ms's avatar
chicm-ms committed
161
162
        masks = masks.reshape((-1, masks.shape[-1])).transpose([1, 0])
        assert all(masks.sum((1)) == np.array([45., 45., 0., 0., 0., 0., 0., 0., 45., 45.]))
163

Tang Lang's avatar
Tang Lang committed
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
    def test_torch_l1filter_pruner(self):
        """
        Filters with the minimum sum of the weights' L1 norm are pruned in this paper:
        PRUNING FILTERS FOR EFFICIENT CONVNETS,
        https://arxiv.org/abs/1608.08710

        So if sparsity is 0.2, the expected masks should mask out filter 0, this can be verified through:
        `all(torch.sum(mask1, (1, 2, 3)).numpy() == np.array([0., 27., 27., 27., 27.]))`

        If sparsity is 0.6, the expected masks should mask out filter 0,1,2, this can be verified through:
        `all(torch.sum(mask2, (1, 2, 3)).numpy() == np.array([0., 0., 0., 27., 27.]))`
        """
        w = np.array([np.zeros((3, 3, 3)), np.ones((3, 3, 3)), np.ones((3, 3, 3)) * 2,
                      np.ones((3, 3, 3)) * 3, np.ones((3, 3, 3)) * 4])
        model = TorchModel()
        config_list = [{'sparsity': 0.2, 'op_names': ['conv1']}, {'sparsity': 0.6, 'op_names': ['conv2']}]
        pruner = torch_compressor.L1FilterPruner(model, config_list)

        model.conv1.weight.data = torch.tensor(w).float()
        model.conv2.weight.data = torch.tensor(w).float()
        layer1 = torch_compressor.compressor.LayerInfo('conv1', model.conv1)
        mask1 = pruner.calc_mask(layer1, config_list[0])
        layer2 = torch_compressor.compressor.LayerInfo('conv2', model.conv2)
        mask2 = pruner.calc_mask(layer2, config_list[1])
        assert all(torch.sum(mask1, (1, 2, 3)).numpy() == np.array([0., 27., 27., 27., 27.]))
        assert all(torch.sum(mask2, (1, 2, 3)).numpy() == np.array([0., 0., 0., 27., 27.]))

    def test_torch_slim_pruner(self):
        """
        Scale factors with minimum l1 norm in the BN layers are pruned in this paper:
        Learning Efficient Convolutional Networks through Network Slimming,
        https://arxiv.org/pdf/1708.06519.pdf

        So if sparsity is 0.2, the expected masks should mask out channel 0, this can be verified through:
        `all(mask1.numpy() == np.array([0., 1., 1., 1., 1.]))`
        `all(mask2.numpy() == np.array([0., 1., 1., 1., 1.]))`

        If sparsity is 0.6, the expected masks should mask out channel 0,1,2, this can be verified through:
        `all(mask1.numpy() == np.array([0., 0., 0., 1., 1.]))`
        `all(mask2.numpy() == np.array([0., 0., 0., 1., 1.]))`
        """
        w = np.array([0, 1, 2, 3, 4])
        model = TorchModel()
        config_list = [{'sparsity': 0.2, 'op_types': ['BatchNorm2d']}]
        model.bn1.weight.data = torch.tensor(w).float()
        model.bn2.weight.data = torch.tensor(-w).float()
        pruner = torch_compressor.SlimPruner(model, config_list)

        layer1 = torch_compressor.compressor.LayerInfo('bn1', model.bn1)
        mask1 = pruner.calc_mask(layer1, config_list[0])
        layer2 = torch_compressor.compressor.LayerInfo('bn2', model.bn2)
        mask2 = pruner.calc_mask(layer2, config_list[0])
        assert all(mask1.numpy() == np.array([0., 1., 1., 1., 1.]))
        assert all(mask2.numpy() == np.array([0., 1., 1., 1., 1.]))

        config_list = [{'sparsity': 0.6, 'op_types': ['BatchNorm2d']}]
        model.bn1.weight.data = torch.tensor(w).float()
        model.bn2.weight.data = torch.tensor(w).float()
        pruner = torch_compressor.SlimPruner(model, config_list)

        layer1 = torch_compressor.compressor.LayerInfo('bn1', model.bn1)
        mask1 = pruner.calc_mask(layer1, config_list[0])
        layer2 = torch_compressor.compressor.LayerInfo('bn2', model.bn2)
        mask2 = pruner.calc_mask(layer2, config_list[0])
        assert all(mask1.numpy() == np.array([0., 0., 0., 1., 1.]))
        assert all(mask2.numpy() == np.array([0., 0., 0., 1., 1.]))

231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
    def test_torch_QAT_quantizer(self):
        model = TorchModel()
        config_list = [{
            'quant_types': ['weight'],
            'quant_bits': 8,
            'op_types':['Conv2d', 'Linear']
        }, {
            'quant_types': ['output'],
            'quant_bits': 8,
            'quant_start_step': 0,
            'op_types':['ReLU']
        }]
        model.relu = torch.nn.ReLU()
        quantizer = torch_compressor.QAT_Quantizer(model, config_list)
        quantizer.compress()
        # test quantize
        # range not including 0
        eps = 1e-7
        weight = torch.tensor([[1, 2], [3, 5]]).float()
        quantize_weight = quantizer.quantize_weight(weight, config_list[0], model.conv2)
        assert math.isclose(model.conv2.scale, 5 / 255, abs_tol=eps)
        assert model.conv2.zero_point == 0
         # range including 0
        weight = torch.tensor([[-1, 2], [3, 5]]).float()
        quantize_weight = quantizer.quantize_weight(weight, config_list[0], model.conv2)
        assert math.isclose(model.conv2.scale, 6 / 255, abs_tol=eps)
        assert model.conv2.zero_point == 42

        # test ema
        x = torch.tensor([[-0.2, 0], [0.1, 0.2]])
        out = model.relu(x)
        assert math.isclose(model.relu.tracked_min_biased, 0, abs_tol=eps)
        assert math.isclose(model.relu.tracked_max_biased, 0.002, abs_tol=eps)

        quantizer.step()
        x = torch.tensor([[0.2, 0.4], [0.6, 0.8]])
        out = model.relu(x)
        assert math.isclose(model.relu.tracked_min_biased, 0.002, abs_tol=eps)
        assert math.isclose(model.relu.tracked_max_biased, 0.00998, abs_tol=eps)

271
272
if __name__ == '__main__':
    main()