test_models.py 19.5 KB
Newer Older
1
2
import sys
from common_utils import TestCase, map_nested_tensor_object, freeze_rng_state, set_rng_seed, IN_CIRCLE_CI
3
4
from collections import OrderedDict
from itertools import product
5
6
import functools
import operator
7
import torch
8
import torch.nn as nn
9
10
from torchvision import models
import unittest
11
import warnings
12

13
14
import pytest

eellison's avatar
eellison committed
15

16
17
18
def get_available_classification_models():
    # TODO add a registration mechanism to torchvision.models
    return [k for k, v in models.__dict__.items() if callable(v) and k[0].lower() == k[0] and k[0] != "_"]
19
20
21
22
23


def get_available_segmentation_models():
    # TODO add a registration mechanism to torchvision.models
    return [k for k, v in models.segmentation.__dict__.items() if callable(v) and k[0].lower() == k[0] and k[0] != "_"]
24
25


26
27
28
29
30
def get_available_detection_models():
    # TODO add a registration mechanism to torchvision.models
    return [k for k, v in models.detection.__dict__.items() if callable(v) and k[0].lower() == k[0] and k[0] != "_"]


31
32
33
34
35
def get_available_video_models():
    # TODO add a registration mechanism to torchvision.models
    return [k for k, v in models.video.__dict__.items() if callable(v) and k[0].lower() == k[0] and k[0] != "_"]


36
37
38
# If 'unwrapper' is provided it will be called with the script model outputs
# before they are compared to the eager model outputs. This is useful if the
# model outputs are different between TorchScript / Eager mode
39
40
41
42
script_model_unwrapper = {
    'googlenet': lambda x: x.logits,
    'inception_v3': lambda x: x.logits,
    "fasterrcnn_resnet50_fpn": lambda x: x[1],
43
    "fasterrcnn_mobilenet_v3_large_fpn": lambda x: x[1],
44
    "fasterrcnn_mobilenet_v3_large_320_fpn": lambda x: x[1],
45
46
47
    "maskrcnn_resnet50_fpn": lambda x: x[1],
    "keypointrcnn_resnet50_fpn": lambda x: x[1],
    "retinanet_resnet50_fpn": lambda x: x[1],
48
    "ssd300_vgg16": lambda x: x[1],
49
    "ssdlite320_mobilenet_v3_large": lambda x: x[1],
50
}
51
52


53
54
55
56
57
58
59
60
61
62
63
64
65
66
# The following models exhibit flaky numerics under autocast in _test_*_model harnesses.
# This may be caused by the harness environment (e.g. num classes, input initialization
# via torch.rand), and does not prove autocast is unsuitable when training with real data
# (autocast has been used successfully with real data for some of these models).
# TODO:  investigate why autocast numerics are flaky in the harnesses.
#
# For the following models, _test_*_model harnesses skip numerical checks on outputs when
# trying autocast. However, they still try an autocasted forward pass, so they still ensure
# autocast coverage suffices to prevent dtype errors in each model.
autocast_flaky_numerics = (
    "inception_v3",
    "resnet101",
    "resnet152",
    "wide_resnet101_2",
67
68
    "deeplabv3_resnet50",
    "deeplabv3_resnet101",
69
    "deeplabv3_mobilenet_v3_large",
70
71
    "fcn_resnet50",
    "fcn_resnet101",
72
    "lraspp_mobilenet_v3_large",
73
    "maskrcnn_resnet50_fpn",
74
75
76
)


eellison's avatar
eellison committed
77
class ModelTester(TestCase):
78
    def _test_classification_model(self, name, input_shape, dev):
79
        set_rng_seed(0)
80
81
82
        # passing num_class equal to a number other than 1000 helps in making the test
        # more enforcing in nature
        model = models.__dict__[name](num_classes=50)
83
84
85
        model.eval().to(device=dev)
        # RNG always on CPU, to ensure x in cuda tests is bitwise identical to x in cpu tests
        x = torch.rand(input_shape).to(device=dev)
86
        out = model(x)
87
        self.assertExpected(out.cpu(), name, prec=0.1)
88
        self.assertEqual(out.shape[-1], 50)
89
        self.check_jit_scriptable(model, (x,), unwrapper=script_model_unwrapper.get(name, None))
90

91
        if dev == torch.device("cuda"):
92
93
94
95
            with torch.cuda.amp.autocast():
                out = model(x)
                # See autocast_flaky_numerics comment at top of file.
                if name not in autocast_flaky_numerics:
96
                    self.assertExpected(out.cpu(), name, prec=0.1)
97
98
99
                self.assertEqual(out.shape[-1], 50)

    def _test_segmentation_model(self, name, dev):
100
101
102
103
        set_rng_seed(0)
        # passing num_classes equal to a number other than 21 helps in making the test's
        # expected file size smaller
        model = models.segmentation.__dict__[name](num_classes=10, pretrained_backbone=False)
104
        model.eval().to(device=dev)
105
        input_shape = (1, 3, 32, 32)
106
107
        # RNG always on CPU, to ensure x in cuda tests is bitwise identical to x in cpu tests
        x = torch.rand(input_shape).to(device=dev)
108
109
110
111
112
113
114
115
        out = model(x)["out"]

        def check_out(out):
            prec = 0.01
            try:
                # We first try to assert the entire output if possible. This is not
                # only the best way to assert results but also handles the cases
                # where we need to create a new expected result.
116
                self.assertExpected(out.cpu(), name, prec=prec)
117
118
119
120
            except AssertionError:
                # Unfortunately some segmentation models are flaky with autocast
                # so instead of validating the probability scores, check that the class
                # predictions match.
121
                expected_file = self._get_expected_file(name)
122
123
124
125
126
127
128
129
                expected = torch.load(expected_file)
                self.assertEqual(out.argmax(dim=1), expected.argmax(dim=1), prec=prec)
                return False  # Partial validation performed

            return True  # Full validation performed

        full_validation = check_out(out)

130
        self.check_jit_scriptable(model, (x,), unwrapper=script_model_unwrapper.get(name, None))
131

132
        if dev == torch.device("cuda"):
133
            with torch.cuda.amp.autocast():
134
135
136
137
138
139
140
141
142
143
144
145
                out = model(x)["out"]
                # See autocast_flaky_numerics comment at top of file.
                if name not in autocast_flaky_numerics:
                    full_validation &= check_out(out)

        if not full_validation:
            msg = "The output of {} could only be partially validated. " \
                  "This is likely due to unit-test flakiness, but you may " \
                  "want to do additional manual checks if you made " \
                  "significant changes to the codebase.".format(self._testMethodName)
            warnings.warn(msg, RuntimeWarning)
            raise unittest.SkipTest(msg)
146
147

    def _test_detection_model(self, name, dev):
eellison's avatar
eellison committed
148
        set_rng_seed(0)
149
150
        kwargs = {}
        if "retinanet" in name:
151
152
            # Reduce the default threshold to ensure the returned boxes are not empty.
            kwargs["score_thresh"] = 0.01
153
        elif "fasterrcnn_mobilenet_v3_large" in name:
154
            kwargs["box_score_thresh"] = 0.02076
155
156
157
            if "fasterrcnn_mobilenet_v3_large_320_fpn" in name:
                kwargs["rpn_pre_nms_top_n_test"] = 1000
                kwargs["rpn_post_nms_top_n_test"] = 1000
158
        model = models.detection.__dict__[name](num_classes=50, pretrained_backbone=False, **kwargs)
159
        model.eval().to(device=dev)
160
        input_shape = (3, 300, 300)
161
162
        # RNG always on CPU, to ensure x in cuda tests is bitwise identical to x in cpu tests
        x = torch.rand(input_shape).to(device=dev)
163
164
165
        model_input = [x]
        out = model(model_input)
        self.assertIs(model_input[0], x)
eellison's avatar
eellison committed
166

167
168
169
        def check_out(out):
            self.assertEqual(len(out), 1)

170
171
172
173
174
175
176
177
            def compact(tensor):
                size = tensor.size()
                elements_per_sample = functools.reduce(operator.mul, size[1:], 1)
                if elements_per_sample > 30:
                    return compute_mean_std(tensor)
                else:
                    return subsample_tensor(tensor)

178
            def subsample_tensor(tensor):
179
                num_elems = tensor.size(0)
180
181
182
183
184
                num_samples = 20
                if num_elems <= num_samples:
                    return tensor

                ith_index = num_elems // num_samples
185
                return tensor[ith_index - 1::ith_index]
186
187
188
189
190
191
192
193

            def compute_mean_std(tensor):
                # can't compute mean of integral tensor
                tensor = tensor.to(torch.double)
                mean = torch.mean(tensor)
                std = torch.std(tensor)
                return {"mean": mean, "std": std}

194
195
196
197
198
199
            output = map_nested_tensor_object(out, tensor_map_fn=compact)
            prec = 0.01
            try:
                # We first try to assert the entire output if possible. This is not
                # only the best way to assert results but also handles the cases
                # where we need to create a new expected result.
200
                self.assertExpected(output, name, prec=prec)
201
202
203
204
205
            except AssertionError:
                # Unfortunately detection models are flaky due to the unstable sort
                # in NMS. If matching across all outputs fails, use the same approach
                # as in NMSTester.test_nms_cuda to see if this is caused by duplicate
                # scores.
206
                expected_file = self._get_expected_file(name)
207
208
209
210
211
212
213
214
215
216
217
218
                expected = torch.load(expected_file)
                self.assertEqual(output[0]["scores"], expected[0]["scores"], prec=prec)

                # Note: Fmassa proposed turning off NMS by adapting the threshold
                # and then using the Hungarian algorithm as in DETR to find the
                # best match between output and expected boxes and eliminate some
                # of the flakiness. Worth exploring.
                return False  # Partial validation performed

            return True  # Full validation performed

        full_validation = check_out(out)
219
        self.check_jit_scriptable(model, ([x],), unwrapper=script_model_unwrapper.get(name, None))
220

221
        if dev == torch.device("cuda"):
222
223
224
225
            with torch.cuda.amp.autocast():
                out = model(model_input)
                # See autocast_flaky_numerics comment at top of file.
                if name not in autocast_flaky_numerics:
226
227
228
229
230
231
232
233
234
                    full_validation &= check_out(out)

        if not full_validation:
            msg = "The output of {} could only be partially validated. " \
                  "This is likely due to unit-test flakiness, but you may " \
                  "want to do additional manual checks if you made " \
                  "significant changes to the codebase.".format(self._testMethodName)
            warnings.warn(msg, RuntimeWarning)
            raise unittest.SkipTest(msg)
235

236
237
238
    def _test_detection_model_validation(self, name):
        set_rng_seed(0)
        model = models.detection.__dict__[name](num_classes=50, pretrained_backbone=False)
239
        input_shape = (3, 300, 300)
240
241
242
243
244
245
246
247
248
249
250
251
252
253
        x = [torch.rand(input_shape)]

        # validate that targets are present in training
        self.assertRaises(ValueError, model, x)

        # validate type
        targets = [{'boxes': 0.}]
        self.assertRaises(ValueError, model, x, targets=targets)

        # validate boxes shape
        for boxes in (torch.rand((4,)), torch.rand((1, 5))):
            targets = [{'boxes': boxes}]
            self.assertRaises(ValueError, model, x, targets=targets)

254
255
256
257
258
        # validate that no degenerate boxes are present
        boxes = torch.tensor([[1, 3, 1, 4], [2, 4, 3, 4]])
        targets = [{'boxes': boxes}]
        self.assertRaises(ValueError, model, x, targets=targets)

259
    def _test_video_model(self, name, dev):
260
261
262
263
264
        # the default input shape is
        # bs * num_channels * clip_len * h *w
        input_shape = (1, 3, 4, 112, 112)
        # test both basicblock and Bottleneck
        model = models.video.__dict__[name](num_classes=50)
265
266
267
        model.eval().to(device=dev)
        # RNG always on CPU, to ensure x in cuda tests is bitwise identical to x in cpu tests
        x = torch.rand(input_shape).to(device=dev)
268
        out = model(x)
269
        self.check_jit_scriptable(model, (x,), unwrapper=script_model_unwrapper.get(name, None))
270
271
        self.assertEqual(out.shape[-1], 50)

272
        if dev == torch.device("cuda"):
273
274
275
276
            with torch.cuda.amp.autocast():
                out = model(x)
                self.assertEqual(out.shape[-1], 50)

277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
    def _make_sliced_model(self, model, stop_layer):
        layers = OrderedDict()
        for name, layer in model.named_children():
            layers[name] = layer
            if name == stop_layer:
                break
        new_model = torch.nn.Sequential(layers)
        return new_model

    def test_memory_efficient_densenet(self):
        input_shape = (1, 3, 300, 300)
        x = torch.rand(input_shape)

        for name in ['densenet121', 'densenet169', 'densenet201', 'densenet161']:
            model1 = models.__dict__[name](num_classes=50, memory_efficient=True)
            params = model1.state_dict()
293
            num_params = sum([x.numel() for x in model1.parameters()])
294
295
296
            model1.eval()
            out1 = model1(x)
            out1.sum().backward()
297
            num_grad = sum([x.grad.numel() for x in model1.parameters() if x.grad is not None])
298
299
300
301
302
303
304
305

            model2 = models.__dict__[name](num_classes=50, memory_efficient=False)
            model2.load_state_dict(params)
            model2.eval()
            out2 = model2(x)

            max_diff = (out1 - out2).abs().max()

306
            self.assertTrue(num_params == num_grad)
307
308
309
310
311
312
313
314
315
316
317
318
319
            self.assertTrue(max_diff < 1e-5)

    def test_resnet_dilation(self):
        # TODO improve tests to also check that each layer has the right dimensionality
        for i in product([False, True], [False, True], [False, True]):
            model = models.__dict__["resnet50"](replace_stride_with_dilation=i)
            model = self._make_sliced_model(model, stop_layer="layer4")
            model.eval()
            x = torch.rand(1, 3, 224, 224)
            out = model(x)
            f = 2 ** sum(i)
            self.assertEqual(out.shape, (1, 2048, 7 * f, 7 * f))

320
    def test_mobilenet_v2_residual_setting(self):
321
322
323
324
325
326
        model = models.__dict__["mobilenet_v2"](inverted_residual_setting=[[1, 16, 1, 1], [6, 24, 2, 2]])
        model.eval()
        x = torch.rand(1, 3, 224, 224)
        out = model(x)
        self.assertEqual(out.shape[-1], 1000)

327
328
329
330
    def test_mobilenet_norm_layer(self):
        for name in ["mobilenet_v2", "mobilenet_v3_large", "mobilenet_v3_small"]:
            model = models.__dict__[name]()
            self.assertTrue(any(isinstance(x, nn.BatchNorm2d) for x in model.modules()))
331

332
333
            def get_gn(num_channels):
                return nn.GroupNorm(32, num_channels)
334

335
336
337
            model = models.__dict__[name](norm_layer=get_gn)
            self.assertFalse(any(isinstance(x, nn.BatchNorm2d) for x in model.modules()))
            self.assertTrue(any(isinstance(x, nn.GroupNorm) for x in model.modules()))
338

339
    def test_inception_v3_eval(self):
340
341
342
343
344
        # replacement for models.inception_v3(pretrained=True) that does not download weights
        kwargs = {}
        kwargs['transform_input'] = True
        kwargs['aux_logits'] = True
        kwargs['init_weights'] = False
345
        name = "inception_v3"
346
347
348
        model = models.Inception3(**kwargs)
        model.aux_logits = False
        model.AuxLogits = None
349
350
351
        model = model.eval()
        x = torch.rand(1, 3, 299, 299)
        self.check_jit_scriptable(model, (x,), unwrapper=script_model_unwrapper.get(name, None))
352

353
354
355
356
357
358
359
360
361
362
363
364
365
366
    def test_fasterrcnn_double(self):
        model = models.detection.fasterrcnn_resnet50_fpn(num_classes=50, pretrained_backbone=False)
        model.double()
        model.eval()
        input_shape = (3, 300, 300)
        x = torch.rand(input_shape, dtype=torch.float64)
        model_input = [x]
        out = model(model_input)
        self.assertIs(model_input[0], x)
        self.assertEqual(len(out), 1)
        self.assertTrue("boxes" in out[0])
        self.assertTrue("scores" in out[0])
        self.assertTrue("labels" in out[0])

367
    def test_googlenet_eval(self):
368
369
370
371
372
        # replacement for models.googlenet(pretrained=True) that does not download weights
        kwargs = {}
        kwargs['transform_input'] = True
        kwargs['aux_logits'] = True
        kwargs['init_weights'] = False
373
        name = "googlenet"
374
375
376
377
        model = models.GoogLeNet(**kwargs)
        model.aux_logits = False
        model.aux1 = None
        model.aux2 = None
378
379
380
        model = model.eval()
        x = torch.rand(1, 3, 224, 224)
        self.check_jit_scriptable(model, (x,), unwrapper=script_model_unwrapper.get(name, None))
381

382
383
    @unittest.skipIf(not torch.cuda.is_available(), 'needs GPU')
    def test_fasterrcnn_switch_devices(self):
384
385
386
387
388
389
        def checkOut(out):
            self.assertEqual(len(out), 1)
            self.assertTrue("boxes" in out[0])
            self.assertTrue("scores" in out[0])
            self.assertTrue("labels" in out[0])

390
391
392
393
394
395
396
397
        model = models.detection.fasterrcnn_resnet50_fpn(num_classes=50, pretrained_backbone=False)
        model.cuda()
        model.eval()
        input_shape = (3, 300, 300)
        x = torch.rand(input_shape, device='cuda')
        model_input = [x]
        out = model(model_input)
        self.assertIs(model_input[0], x)
398
399
400
401
402
403
404
405

        checkOut(out)

        with torch.cuda.amp.autocast():
            out = model(model_input)

        checkOut(out)

406
407
408
409
        # now switch to cpu and make sure it works
        model.cpu()
        x = x.cpu()
        out_cpu = model([x])
410
411

        checkOut(out_cpu)
412

413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
    def test_generalizedrcnn_transform_repr(self):

        min_size, max_size = 224, 299
        image_mean = [0.485, 0.456, 0.406]
        image_std = [0.229, 0.224, 0.225]

        t = models.detection.transform.GeneralizedRCNNTransform(min_size=min_size,
                                                                max_size=max_size,
                                                                image_mean=image_mean,
                                                                image_std=image_std)

        # Check integrity of object __repr__ attribute
        expected_string = 'GeneralizedRCNNTransform('
        _indent = '\n    '
        expected_string += '{0}Normalize(mean={1}, std={2})'.format(_indent, image_mean, image_std)
        expected_string += '{0}Resize(min_size=({1},), max_size={2}, '.format(_indent, min_size, max_size)
        expected_string += "mode='bilinear')\n)"
        self.assertEqual(t.__repr__(), expected_string)

432

433
_devs = [torch.device("cpu"), torch.device("cuda")] if torch.cuda.is_available() else [torch.device("cpu")]
434
435


436
437
438
439
440
@pytest.mark.parametrize('model_name', get_available_classification_models())
@pytest.mark.parametrize('dev', _devs)
def test_classification_model(model_name, dev):
    input_shape = (1, 3, 299, 299) if model_name == 'inception_v3' else (1, 3, 224, 224)
    ModelTester()._test_classification_model(model_name, input_shape, dev)
441
442


443
444
445
446
@pytest.mark.parametrize('model_name', get_available_segmentation_models())
@pytest.mark.parametrize('dev', _devs)
def test_segmentation_model(model_name, dev):
    ModelTester()._test_segmentation_model(model_name, dev)
447

448

449
450
451
452
@pytest.mark.parametrize('model_name', get_available_detection_models())
@pytest.mark.parametrize('dev', _devs)
def test_detection_model(model_name, dev):
    ModelTester()._test_detection_model(model_name, dev)
453

454

455
456
457
@pytest.mark.parametrize('model_name', get_available_detection_models())
def test_detection_model_validation(model_name):
    ModelTester()._test_detection_model_validation(model_name)
458

459

460
461
462
@pytest.mark.parametrize('model_name', get_available_video_models())
@pytest.mark.parametrize('dev', _devs)
def test_video_model(model_name, dev):
463
464
465
    if IN_CIRCLE_CI and 'cuda' in dev.type and model_name == 'r2plus1d_18' and sys.platform == 'linux':
        # FIXME: Failure should fixed and test re-actived. See https://github.com/pytorch/vision/issues/3702
        pytest.skip('r2plus1d_18 fails on CircleCI linux GPU machines.')
466
    ModelTester()._test_video_model(model_name, dev)
467

468

469
if __name__ == '__main__':
470
    pytest.main([__file__])