test_ops.py 49.1 KB
Newer Older
1
import math
2
import os
3
from abc import ABC, abstractmethod
4
5
from functools import lru_cache
from typing import Tuple
6

7
import numpy as np
8
import pytest
9
import torch
10
11
from common_utils import needs_cuda, cpu_and_gpu, assert_equal
from PIL import Image
12
from torch import nn, Tensor
13
from torch.autograd import gradcheck
14
from torch.nn.modules.utils import _pair
15
from torchvision import models, ops
16
17


18
19
class RoIOpTester(ABC):
    dtype = torch.float64
20

21
22
    @pytest.mark.parametrize("device", cpu_and_gpu())
    @pytest.mark.parametrize("contiguous", (True, False))
23
    def test_forward(self, device, contiguous, x_dtype=None, rois_dtype=None, **kwargs):
24
25
        x_dtype = self.dtype if x_dtype is None else x_dtype
        rois_dtype = self.dtype if rois_dtype is None else rois_dtype
26
27
28
        pool_size = 5
        # n_channels % (pool_size ** 2) == 0 required for PS opeartions.
        n_channels = 2 * (pool_size ** 2)
29
        x = torch.rand(2, n_channels, 10, 10, dtype=x_dtype, device=device)
30
31
        if not contiguous:
            x = x.permute(0, 1, 3, 2)
32
33
34
35
36
        rois = torch.tensor(
            [[0, 0, 0, 9, 9], [0, 0, 5, 4, 9], [0, 5, 5, 9, 9], [1, 0, 0, 9, 9]],  # format is (xyxy)
            dtype=rois_dtype,
            device=device,
        )
37

38
        pool_h, pool_w = pool_size, pool_size
39
        y = self.fn(x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs)
40
        # the following should be true whether we're running an autocast test or not.
41
        assert y.dtype == x.dtype
42
43
44
        gt_y = self.expected_fn(
            x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, device=device, dtype=self.dtype, **kwargs
        )
45

46
        tol = 1e-3 if (x_dtype is torch.half or rois_dtype is torch.half) else 1e-5
47
        torch.testing.assert_close(gt_y.to(y), y, rtol=tol, atol=tol)
48

49
    @pytest.mark.parametrize("seed", range(10))
50
51
    @pytest.mark.parametrize("device", cpu_and_gpu())
    @pytest.mark.parametrize("contiguous", (True, False))
52
53
    def test_backward(self, seed, device, contiguous):
        torch.random.manual_seed(seed)
54
55
56
57
        pool_size = 2
        x = torch.rand(1, 2 * (pool_size ** 2), 5, 5, dtype=self.dtype, device=device, requires_grad=True)
        if not contiguous:
            x = x.permute(0, 1, 3, 2)
58
59
60
        rois = torch.tensor(
            [[0, 0, 0, 4, 4], [0, 0, 2, 3, 4], [0, 2, 2, 4, 4]], dtype=self.dtype, device=device  # format is (xyxy)
        )
61

62
63
        def func(z):
            return self.fn(z, rois, pool_size, pool_size, spatial_scale=1, sampling_ratio=1)
64

65
        script_func = self.get_script_fn(rois, pool_size)
66

67
68
        gradcheck(func, (x,))
        gradcheck(script_func, (x,))
69

70
    @needs_cuda
71
72
    @pytest.mark.parametrize("x_dtype", (torch.float, torch.half))
    @pytest.mark.parametrize("rois_dtype", (torch.float, torch.half))
73
74
75
    def test_autocast(self, x_dtype, rois_dtype):
        with torch.cuda.amp.autocast():
            self.test_forward(torch.device("cuda"), contiguous=False, x_dtype=x_dtype, rois_dtype=rois_dtype)
76
77
78

    def _helper_boxes_shape(self, func):
        # test boxes as Tensor[N, 5]
79
        with pytest.raises(AssertionError):
80
81
82
83
84
            a = torch.linspace(1, 8 * 8, 8 * 8).reshape(1, 1, 8, 8)
            boxes = torch.tensor([[0, 0, 3, 3]], dtype=a.dtype)
            func(a, boxes, output_size=(2, 2))

        # test boxes as List[Tensor[N, 4]]
85
        with pytest.raises(AssertionError):
86
87
88
89
            a = torch.linspace(1, 8 * 8, 8 * 8).reshape(1, 1, 8, 8)
            boxes = torch.tensor([[0, 0, 3]], dtype=a.dtype)
            ops.roi_pool(a, [boxes], output_size=(2, 2))

90
    @abstractmethod
91
92
    def fn(*args, **kwargs):
        pass
93

94
    @abstractmethod
95
96
    def get_script_fn(*args, **kwargs):
        pass
97

98
    @abstractmethod
99
100
    def expected_fn(*args, **kwargs):
        pass
101

102

103
class TestRoiPool(RoIOpTester):
104
105
    def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs):
        return ops.RoIPool((pool_h, pool_w), spatial_scale)(x, rois)
106

107
    def get_script_fn(self, rois, pool_size):
Nicolas Hug's avatar
Nicolas Hug committed
108
109
        scriped = torch.jit.script(ops.roi_pool)
        return lambda x: scriped(x, rois, pool_size)
110

111
112
113
    def expected_fn(
        self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, device=None, dtype=torch.float64
    ):
114
115
        if device is None:
            device = torch.device("cpu")
116

117
118
        n_channels = x.size(1)
        y = torch.zeros(rois.size(0), n_channels, pool_h, pool_w, dtype=dtype, device=device)
119

120
121
        def get_slice(k, block):
            return slice(int(np.floor(k * block)), int(np.ceil((k + 1) * block)))
122

123
124
125
        for roi_idx, roi in enumerate(rois):
            batch_idx = int(roi[0])
            j_begin, i_begin, j_end, i_end = (int(round(x.item() * spatial_scale)) for x in roi[1:])
126
            roi_x = x[batch_idx, :, i_begin : i_end + 1, j_begin : j_end + 1]
127

128
129
130
            roi_h, roi_w = roi_x.shape[-2:]
            bin_h = roi_h / pool_h
            bin_w = roi_w / pool_w
131

132
133
134
135
136
137
            for i in range(0, pool_h):
                for j in range(0, pool_w):
                    bin_x = roi_x[:, get_slice(i, bin_h), get_slice(j, bin_w)]
                    if bin_x.numel() > 0:
                        y[roi_idx, :, i, j] = bin_x.reshape(n_channels, -1).max(dim=1)[0]
        return y
138

139
    def test_boxes_shape(self):
140
141
        self._helper_boxes_shape(ops.roi_pool)

142

143
class TestPSRoIPool(RoIOpTester):
144
145
    def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs):
        return ops.PSRoIPool((pool_h, pool_w), 1)(x, rois)
146

147
    def get_script_fn(self, rois, pool_size):
Nicolas Hug's avatar
Nicolas Hug committed
148
149
        scriped = torch.jit.script(ops.ps_roi_pool)
        return lambda x: scriped(x, rois, pool_size)
150

151
152
153
    def expected_fn(
        self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, device=None, dtype=torch.float64
    ):
154
155
156
        if device is None:
            device = torch.device("cpu")
        n_input_channels = x.size(1)
157
        assert n_input_channels % (pool_h * pool_w) == 0, "input channels must be divisible by ph * pw"
158
159
160
161
162
163
164
165
166
        n_output_channels = int(n_input_channels / (pool_h * pool_w))
        y = torch.zeros(rois.size(0), n_output_channels, pool_h, pool_w, dtype=dtype, device=device)

        def get_slice(k, block):
            return slice(int(np.floor(k * block)), int(np.ceil((k + 1) * block)))

        for roi_idx, roi in enumerate(rois):
            batch_idx = int(roi[0])
            j_begin, i_begin, j_end, i_end = (int(round(x.item() * spatial_scale)) for x in roi[1:])
167
            roi_x = x[batch_idx, :, i_begin : i_end + 1, j_begin : j_end + 1]
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182

            roi_height = max(i_end - i_begin, 1)
            roi_width = max(j_end - j_begin, 1)
            bin_h, bin_w = roi_height / float(pool_h), roi_width / float(pool_w)

            for i in range(0, pool_h):
                for j in range(0, pool_w):
                    bin_x = roi_x[:, get_slice(i, bin_h), get_slice(j, bin_w)]
                    if bin_x.numel() > 0:
                        area = bin_x.size(-2) * bin_x.size(-1)
                        for c_out in range(0, n_output_channels):
                            c_in = c_out * (pool_h * pool_w) + pool_w * i + j
                            t = torch.sum(bin_x[c_in, :, :])
                            y[roi_idx, c_out, i, j] = t / area
        return y
183

184
    def test_boxes_shape(self):
185
186
        self._helper_boxes_shape(ops.ps_roi_pool)

187

188
189
def bilinear_interpolate(data, y, x, snap_border=False):
    height, width = data.shape
190

191
192
193
194
195
    if snap_border:
        if -1 < y <= 0:
            y = 0
        elif height - 1 <= y < height:
            y = height - 1
196

197
198
199
200
        if -1 < x <= 0:
            x = 0
        elif width - 1 <= x < width:
            x = width - 1
201

202
203
204
205
    y_low = int(math.floor(y))
    x_low = int(math.floor(x))
    y_high = y_low + 1
    x_high = x_low + 1
206

207
208
    wy_h = y - y_low
    wx_h = x - x_low
209
    wy_l = 1 - wy_h
210
    wx_l = 1 - wx_h
211

212
    val = 0
213
214
215
216
    for wx, xp in zip((wx_l, wx_h), (x_low, x_high)):
        for wy, yp in zip((wy_l, wy_h), (y_low, y_high)):
            if 0 <= yp < height and 0 <= xp < width:
                val += wx * wy * data[yp, xp]
217
    return val
218
219


220
class TestRoIAlign(RoIOpTester):
AhnDW's avatar
AhnDW committed
221
    def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, aligned=False, **kwargs):
222
223
224
        return ops.RoIAlign(
            (pool_h, pool_w), spatial_scale=spatial_scale, sampling_ratio=sampling_ratio, aligned=aligned
        )(x, rois)
225

226
    def get_script_fn(self, rois, pool_size):
Nicolas Hug's avatar
Nicolas Hug committed
227
228
        scriped = torch.jit.script(ops.roi_align)
        return lambda x: scriped(x, rois, pool_size)
229

230
231
232
233
234
235
236
237
238
239
240
241
    def expected_fn(
        self,
        in_data,
        rois,
        pool_h,
        pool_w,
        spatial_scale=1,
        sampling_ratio=-1,
        aligned=False,
        device=None,
        dtype=torch.float64,
    ):
242
243
        if device is None:
            device = torch.device("cpu")
244
245
246
        n_channels = in_data.size(1)
        out_data = torch.zeros(rois.size(0), n_channels, pool_h, pool_w, dtype=dtype, device=device)

247
        offset = 0.5 if aligned else 0.0
AhnDW's avatar
AhnDW committed
248

249
250
        for r, roi in enumerate(rois):
            batch_idx = int(roi[0])
AhnDW's avatar
AhnDW committed
251
            j_begin, i_begin, j_end, i_end = (x.item() * spatial_scale - offset for x in roi[1:])
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271

            roi_h = i_end - i_begin
            roi_w = j_end - j_begin
            bin_h = roi_h / pool_h
            bin_w = roi_w / pool_w

            for i in range(0, pool_h):
                start_h = i_begin + i * bin_h
                grid_h = sampling_ratio if sampling_ratio > 0 else int(np.ceil(bin_h))
                for j in range(0, pool_w):
                    start_w = j_begin + j * bin_w
                    grid_w = sampling_ratio if sampling_ratio > 0 else int(np.ceil(bin_w))

                    for channel in range(0, n_channels):

                        val = 0
                        for iy in range(0, grid_h):
                            y = start_h + (iy + 0.5) * bin_h / grid_h
                            for ix in range(0, grid_w):
                                x = start_w + (ix + 0.5) * bin_w / grid_w
272
                                val += bilinear_interpolate(in_data[batch_idx, channel, :, :], y, x, snap_border=True)
273
274
275
                        val /= grid_h * grid_w

                        out_data[r, channel, i, j] = val
276
277
        return out_data

278
    def test_boxes_shape(self):
279
280
        self._helper_boxes_shape(ops.roi_align)

281
282
283
    @pytest.mark.parametrize("aligned", (True, False))
    @pytest.mark.parametrize("device", cpu_and_gpu())
    @pytest.mark.parametrize("contiguous", (True, False))
284
    def test_forward(self, device, contiguous, aligned, x_dtype=None, rois_dtype=None):
285
286
287
        super().test_forward(
            device=device, contiguous=contiguous, x_dtype=x_dtype, rois_dtype=rois_dtype, aligned=aligned
        )
288

289
    @needs_cuda
290
291
292
    @pytest.mark.parametrize("aligned", (True, False))
    @pytest.mark.parametrize("x_dtype", (torch.float, torch.half))
    @pytest.mark.parametrize("rois_dtype", (torch.float, torch.half))
293
294
    def test_autocast(self, aligned, x_dtype, rois_dtype):
        with torch.cuda.amp.autocast():
295
296
297
            self.test_forward(
                torch.device("cuda"), contiguous=False, aligned=aligned, x_dtype=x_dtype, rois_dtype=rois_dtype
            )
298
299
300
301
302
303
304

    def _make_rois(self, img_size, num_imgs, dtype, num_rois=1000):
        rois = torch.randint(0, img_size // 2, size=(num_rois, 5)).to(dtype)
        rois[:, 0] = torch.randint(0, num_imgs, size=(num_rois,))  # set batch index
        rois[:, 3:] += rois[:, 1:3]  # make sure boxes aren't degenerate
        return rois

305
306
307
    @pytest.mark.parametrize("aligned", (True, False))
    @pytest.mark.parametrize("scale, zero_point", ((1, 0), (2, 10), (0.1, 50)))
    @pytest.mark.parametrize("qdtype", (torch.qint8, torch.quint8, torch.qint32))
308
    def test_qroialign(self, aligned, scale, zero_point, qdtype):
309
310
311
312
313
314
315
        """Make sure quantized version of RoIAlign is close to float version"""
        pool_size = 5
        img_size = 10
        n_channels = 2
        num_imgs = 1
        dtype = torch.float

316
317
318
319
320
321
322
        x = torch.randint(50, 100, size=(num_imgs, n_channels, img_size, img_size)).to(dtype)
        qx = torch.quantize_per_tensor(x, scale=scale, zero_point=zero_point, dtype=qdtype)

        rois = self._make_rois(img_size, num_imgs, dtype)
        qrois = torch.quantize_per_tensor(rois, scale=scale, zero_point=zero_point, dtype=qdtype)

        x, rois = qx.dequantize(), qrois.dequantize()  # we want to pass the same inputs
323

324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
        y = ops.roi_align(
            x,
            rois,
            output_size=pool_size,
            spatial_scale=1,
            sampling_ratio=-1,
            aligned=aligned,
        )
        qy = ops.roi_align(
            qx,
            qrois,
            output_size=pool_size,
            spatial_scale=1,
            sampling_ratio=-1,
            aligned=aligned,
        )

        # The output qy is itself a quantized tensor and there might have been a loss of info when it was
        # quantized. For a fair comparison we need to quantize y as well
        quantized_float_y = torch.quantize_per_tensor(y, scale=scale, zero_point=zero_point, dtype=qdtype)

        try:
            # Ideally, we would assert this, which passes with (scale, zero) == (1, 0)
            assert (qy == quantized_float_y).all()
        except AssertionError:
            # But because the computation aren't exactly the same between the 2 RoIAlign procedures, some
            # rounding error may lead to a difference of 2 in the output.
            # For example with (scale, zero) = (2, 10), 45.00000... will be quantized to 44
            # but 45.00000001 will be rounded to 46. We make sure below that:
            # - such discrepancies between qy and quantized_float_y are very rare (less then 5%)
            # - any difference between qy and quantized_float_y is == scale
            diff_idx = torch.where(qy != quantized_float_y)
            num_diff = diff_idx[0].numel()
357
            assert num_diff / qy.numel() < 0.05
358
359
360
361
362
363
364

            abs_diff = torch.abs(qy[diff_idx].dequantize() - quantized_float_y[diff_idx].dequantize())
            t_scale = torch.full_like(abs_diff, fill_value=scale)
            torch.testing.assert_close(abs_diff, t_scale, rtol=1e-5, atol=1e-5)

    def test_qroi_align_multiple_images(self):
        dtype = torch.float
365
366
        x = torch.randint(50, 100, size=(2, 3, 10, 10)).to(dtype)
        qx = torch.quantize_per_tensor(x, scale=1, zero_point=0, dtype=torch.qint8)
367
        rois = self._make_rois(img_size=10, num_imgs=2, dtype=dtype, num_rois=10)
368
        qrois = torch.quantize_per_tensor(rois, scale=1, zero_point=0, dtype=torch.qint8)
369
370
        with pytest.raises(RuntimeError, match="Only one image per batch is allowed"):
            ops.roi_align(qx, qrois, output_size=5)
371

372

373
class TestPSRoIAlign(RoIOpTester):
374
    def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs):
375
        return ops.PSRoIAlign((pool_h, pool_w), spatial_scale=spatial_scale, sampling_ratio=sampling_ratio)(x, rois)
376

377
    def get_script_fn(self, rois, pool_size):
Nicolas Hug's avatar
Nicolas Hug committed
378
379
        scriped = torch.jit.script(ops.ps_roi_align)
        return lambda x: scriped(x, rois, pool_size)
380

381
382
383
    def expected_fn(
        self, in_data, rois, pool_h, pool_w, device, spatial_scale=1, sampling_ratio=-1, dtype=torch.float64
    ):
384
385
        if device is None:
            device = torch.device("cpu")
386
        n_input_channels = in_data.size(1)
387
        assert n_input_channels % (pool_h * pool_w) == 0, "input channels must be divisible by ph * pw"
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
        n_output_channels = int(n_input_channels / (pool_h * pool_w))
        out_data = torch.zeros(rois.size(0), n_output_channels, pool_h, pool_w, dtype=dtype, device=device)

        for r, roi in enumerate(rois):
            batch_idx = int(roi[0])
            j_begin, i_begin, j_end, i_end = (x.item() * spatial_scale - 0.5 for x in roi[1:])

            roi_h = i_end - i_begin
            roi_w = j_end - j_begin
            bin_h = roi_h / pool_h
            bin_w = roi_w / pool_w

            for i in range(0, pool_h):
                start_h = i_begin + i * bin_h
                grid_h = sampling_ratio if sampling_ratio > 0 else int(np.ceil(bin_h))
                for j in range(0, pool_w):
                    start_w = j_begin + j * bin_w
                    grid_w = sampling_ratio if sampling_ratio > 0 else int(np.ceil(bin_w))
                    for c_out in range(0, n_output_channels):
                        c_in = c_out * (pool_h * pool_w) + pool_w * i + j

                        val = 0
                        for iy in range(0, grid_h):
                            y = start_h + (iy + 0.5) * bin_h / grid_h
                            for ix in range(0, grid_w):
                                x = start_w + (ix + 0.5) * bin_w / grid_w
414
                                val += bilinear_interpolate(in_data[batch_idx, c_in, :, :], y, x, snap_border=True)
415
416
417
418
                        val /= grid_h * grid_w

                        out_data[r, c_out, i, j] = val
        return out_data
419

420
    def test_boxes_shape(self):
421
422
        self._helper_boxes_shape(ops.ps_roi_align)

423

424
class TestMultiScaleRoIAlign:
425
    def test_msroialign_repr(self):
426
        fmap_names = ["0"]
427
428
429
430
431
432
        output_size = (7, 7)
        sampling_ratio = 2
        # Pass mock feature map names
        t = ops.poolers.MultiScaleRoIAlign(fmap_names, output_size, sampling_ratio)

        # Check integrity of object __repr__ attribute
433
434
435
436
        expected_string = (
            f"MultiScaleRoIAlign(featmap_names={fmap_names}, output_size={output_size}, "
            f"sampling_ratio={sampling_ratio})"
        )
437
        assert repr(t) == expected_string
438
439


440
441
class TestNMS:
    def _reference_nms(self, boxes, scores, iou_threshold):
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
        """
        Args:
            box_scores (N, 5): boxes in corner-form and probabilities.
            iou_threshold: intersection over union threshold.
        Returns:
             picked: a list of indexes of the kept boxes
        """
        picked = []
        _, indexes = scores.sort(descending=True)
        while len(indexes) > 0:
            current = indexes[0]
            picked.append(current.item())
            if len(indexes) == 1:
                break
            current_box = boxes[current, :]
            indexes = indexes[1:]
            rest_boxes = boxes[indexes, :]
            iou = ops.box_iou(rest_boxes, current_box.unsqueeze(0)).squeeze(1)
            indexes = indexes[iou <= iou_threshold]

        return torch.as_tensor(picked)

464
465
466
467
468
    def _create_tensors_with_iou(self, N, iou_thresh):
        # force last box to have a pre-defined iou with the first box
        # let b0 be [x0, y0, x1, y1], and b1 be [x0, y0, x1 + d, y1],
        # then, in order to satisfy ops.iou(b0, b1) == iou_thresh,
        # we need to have d = (x1 - x0) * (1 - iou_thresh) / iou_thresh
469
470
471
        # Adjust the threshold upward a bit with the intent of creating
        # at least one box that exceeds (barely) the threshold and so
        # should be suppressed.
472
        boxes = torch.rand(N, 4) * 100
473
474
475
        boxes[:, 2:] += boxes[:, :2]
        boxes[-1, :] = boxes[0, :]
        x0, y0, x1, y1 = boxes[-1].tolist()
476
        iou_thresh += 1e-5
477
        boxes[-1, 2] += (x1 - x0) * (1 - iou_thresh) / iou_thresh
478
479
480
        scores = torch.rand(N)
        return boxes, scores

481
    @pytest.mark.parametrize("iou", (0.2, 0.5, 0.8))
482
    def test_nms_ref(self, iou):
483
        err_msg = "NMS incompatible between CPU and reference implementation for IoU={}"
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
        boxes, scores = self._create_tensors_with_iou(1000, iou)
        keep_ref = self._reference_nms(boxes, scores, iou)
        keep = ops.nms(boxes, scores, iou)
        assert torch.allclose(keep, keep_ref), err_msg.format(iou)

    def test_nms_input_errors(self):
        with pytest.raises(RuntimeError):
            ops.nms(torch.rand(4), torch.rand(3), 0.5)
        with pytest.raises(RuntimeError):
            ops.nms(torch.rand(3, 5), torch.rand(3), 0.5)
        with pytest.raises(RuntimeError):
            ops.nms(torch.rand(3, 4), torch.rand(3, 2), 0.5)
        with pytest.raises(RuntimeError):
            ops.nms(torch.rand(3, 4), torch.rand(4), 0.5)

499
    @pytest.mark.parametrize("iou", (0.2, 0.5, 0.8))
500
501
    @pytest.mark.parametrize("scale, zero_point", ((1, 0), (2, 50), (3, 10)))
    def test_qnms(self, iou, scale, zero_point):
502
503
504
        # Note: we compare qnms vs nms instead of qnms vs reference implementation.
        # This is because with the int convertion, the trick used in _create_tensors_with_iou
        # doesn't really work (in fact, nms vs reference implem will also fail with ints)
505
        err_msg = "NMS and QNMS give different results for IoU={}"
506
507
        boxes, scores = self._create_tensors_with_iou(1000, iou)
        scores *= 100  # otherwise most scores would be 0 or 1 after int convertion
508

509
510
        qboxes = torch.quantize_per_tensor(boxes, scale=scale, zero_point=zero_point, dtype=torch.quint8)
        qscores = torch.quantize_per_tensor(scores, scale=scale, zero_point=zero_point, dtype=torch.quint8)
511

512
513
        boxes = qboxes.dequantize()
        scores = qscores.dequantize()
514

515
516
        keep = ops.nms(boxes, scores, iou)
        qkeep = ops.nms(qboxes, qscores, iou)
517

518
        assert torch.allclose(qkeep, keep), err_msg.format(iou)
519

520
    @needs_cuda
521
    @pytest.mark.parametrize("iou", (0.2, 0.5, 0.8))
522
    def test_nms_cuda(self, iou, dtype=torch.float64):
523
        tol = 1e-3 if dtype is torch.half else 1e-5
524
        err_msg = "NMS incompatible between CPU and CUDA for IoU={}"
525

526
527
528
529
530
531
532
533
534
535
536
537
        boxes, scores = self._create_tensors_with_iou(1000, iou)
        r_cpu = ops.nms(boxes, scores, iou)
        r_cuda = ops.nms(boxes.cuda(), scores.cuda(), iou)

        is_eq = torch.allclose(r_cpu, r_cuda.cpu())
        if not is_eq:
            # if the indices are not the same, ensure that it's because the scores
            # are duplicate
            is_eq = torch.allclose(scores[r_cpu], scores[r_cuda.cpu()], rtol=tol, atol=tol)
        assert is_eq, err_msg.format(iou)

    @needs_cuda
538
    @pytest.mark.parametrize("iou", (0.2, 0.5, 0.8))
539
540
541
542
543
544
    @pytest.mark.parametrize("dtype", (torch.float, torch.half))
    def test_autocast(self, iou, dtype):
        with torch.cuda.amp.autocast():
            self.test_nms_cuda(iou=iou, dtype=dtype)

    @needs_cuda
545
    def test_nms_cuda_float16(self):
546
547
548
549
550
551
552
        boxes = torch.tensor(
            [
                [285.3538, 185.5758, 1193.5110, 851.4551],
                [285.1472, 188.7374, 1192.4984, 851.0669],
                [279.2440, 197.9812, 1189.4746, 849.2019],
            ]
        ).cuda()
553
554
555
556
557
        scores = torch.tensor([0.6370, 0.7569, 0.3966]).cuda()

        iou_thres = 0.2
        keep32 = ops.nms(boxes, scores, iou_thres)
        keep16 = ops.nms(boxes.to(torch.float16), scores.to(torch.float16), iou_thres)
558
        assert_equal(keep32, keep16)
559

560
561
562
563
    def test_batched_nms_implementations(self):
        """Make sure that both implementations of batched_nms yield identical results"""

        num_boxes = 1000
564
        iou_threshold = 0.9
565
566
567
568
569
570
571
572
573
574

        boxes = torch.cat((torch.rand(num_boxes, 2), torch.rand(num_boxes, 2) + 10), dim=1)
        assert max(boxes[:, 0]) < min(boxes[:, 2])  # x1 < x2
        assert max(boxes[:, 1]) < min(boxes[:, 3])  # y1 < y2

        scores = torch.rand(num_boxes)
        idxs = torch.randint(0, 4, size=(num_boxes,))
        keep_vanilla = ops.boxes._batched_nms_vanilla(boxes, scores, idxs, iou_threshold)
        keep_trick = ops.boxes._batched_nms_coordinate_trick(boxes, scores, idxs, iou_threshold)

575
576
577
        torch.testing.assert_close(
            keep_vanilla, keep_trick, msg="The vanilla and the trick implementation yield different nms outputs."
        )
578
579
580

        # Also make sure an empty tensor is returned if boxes is empty
        empty = torch.empty((0,), dtype=torch.int64)
581
        torch.testing.assert_close(empty, ops.batched_nms(empty, None, None, None))
582

583

584
585
586
class TestDeformConv:
    dtype = torch.float64

587
    def expected_fn(self, x, weight, offset, mask, bias, stride=1, padding=0, dilation=1):
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
        stride_h, stride_w = _pair(stride)
        pad_h, pad_w = _pair(padding)
        dil_h, dil_w = _pair(dilation)
        weight_h, weight_w = weight.shape[-2:]

        n_batches, n_in_channels, in_h, in_w = x.shape
        n_out_channels = weight.shape[0]

        out_h = (in_h + 2 * pad_h - (dil_h * (weight_h - 1) + 1)) // stride_h + 1
        out_w = (in_w + 2 * pad_w - (dil_w * (weight_w - 1) + 1)) // stride_w + 1

        n_offset_grps = offset.shape[1] // (2 * weight_h * weight_w)
        in_c_per_offset_grp = n_in_channels // n_offset_grps

        n_weight_grps = n_in_channels // weight.shape[1]
        in_c_per_weight_grp = weight.shape[1]
        out_c_per_weight_grp = n_out_channels // n_weight_grps

        out = torch.zeros(n_batches, n_out_channels, out_h, out_w, device=x.device, dtype=x.dtype)
        for b in range(n_batches):
            for c_out in range(n_out_channels):
                for i in range(out_h):
                    for j in range(out_w):
                        for di in range(weight_h):
                            for dj in range(weight_w):
                                for c in range(in_c_per_weight_grp):
                                    weight_grp = c_out // out_c_per_weight_grp
                                    c_in = weight_grp * in_c_per_weight_grp + c

                                    offset_grp = c_in // in_c_per_offset_grp
618
619
                                    mask_idx = offset_grp * (weight_h * weight_w) + di * weight_w + dj
                                    offset_idx = 2 * mask_idx
620
621
622
623

                                    pi = stride_h * i - pad_h + dil_h * di + offset[b, offset_idx, i, j]
                                    pj = stride_w * j - pad_w + dil_w * dj + offset[b, offset_idx + 1, i, j]

624
625
626
627
                                    mask_value = 1.0
                                    if mask is not None:
                                        mask_value = mask[b, mask_idx, i, j]

628
629
630
631
632
                                    out[b, c_out, i, j] += (
                                        mask_value
                                        * weight[c_out, c, di, dj]
                                        * bilinear_interpolate(x[b, c_in, :, :], pi, pj)
                                    )
633
634
635
        out += bias.view(1, n_out_channels, 1, 1)
        return out

636
    @lru_cache(maxsize=None)
637
    def get_fn_args(self, device, contiguous, batch_sz, dtype):
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
        n_in_channels = 6
        n_out_channels = 2
        n_weight_grps = 2
        n_offset_grps = 3

        stride = (2, 1)
        pad = (1, 0)
        dilation = (2, 1)

        stride_h, stride_w = stride
        pad_h, pad_w = pad
        dil_h, dil_w = dilation
        weight_h, weight_w = (3, 2)
        in_h, in_w = (5, 4)

        out_h = (in_h + 2 * pad_h - (dil_h * (weight_h - 1) + 1)) // stride_h + 1
        out_w = (in_w + 2 * pad_w - (dil_w * (weight_w - 1) + 1)) // stride_w + 1

656
        x = torch.rand(batch_sz, n_in_channels, in_h, in_w, device=device, dtype=dtype, requires_grad=True)
657

658
659
660
661
662
663
664
665
666
        offset = torch.randn(
            batch_sz,
            n_offset_grps * 2 * weight_h * weight_w,
            out_h,
            out_w,
            device=device,
            dtype=dtype,
            requires_grad=True,
        )
667

668
669
670
        mask = torch.randn(
            batch_sz, n_offset_grps * weight_h * weight_w, out_h, out_w, device=device, dtype=dtype, requires_grad=True
        )
671

672
673
674
675
676
677
678
679
680
        weight = torch.randn(
            n_out_channels,
            n_in_channels // n_weight_grps,
            weight_h,
            weight_w,
            device=device,
            dtype=dtype,
            requires_grad=True,
        )
681

682
        bias = torch.randn(n_out_channels, device=device, dtype=dtype, requires_grad=True)
683
684
685
686

        if not contiguous:
            x = x.permute(0, 1, 3, 2).contiguous().permute(0, 1, 3, 2)
            offset = offset.permute(1, 3, 0, 2).contiguous().permute(2, 0, 3, 1)
687
            mask = mask.permute(1, 3, 0, 2).contiguous().permute(2, 0, 3, 1)
688
689
            weight = weight.permute(3, 2, 0, 1).contiguous().permute(2, 3, 1, 0)

690
        return x, weight, offset, mask, bias, stride, pad, dilation
691

692
693
694
    @pytest.mark.parametrize("device", cpu_and_gpu())
    @pytest.mark.parametrize("contiguous", (True, False))
    @pytest.mark.parametrize("batch_sz", (0, 33))
695
696
    def test_forward(self, device, contiguous, batch_sz, dtype=None):
        dtype = dtype or self.dtype
697
        x, _, offset, mask, _, stride, padding, dilation = self.get_fn_args(device, contiguous, batch_sz, dtype)
698
699
700
701
        in_channels = 6
        out_channels = 2
        kernel_size = (3, 2)
        groups = 2
Nicolas Hug's avatar
Nicolas Hug committed
702
        tol = 2e-3 if dtype is torch.half else 1e-5
703

704
705
706
        layer = ops.DeformConv2d(
            in_channels, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups
        ).to(device=x.device, dtype=dtype)
707
        res = layer(x, offset, mask)
708
709
710

        weight = layer.weight.data
        bias = layer.bias.data
711
712
        expected = self.expected_fn(x, weight, offset, mask, bias, stride=stride, padding=padding, dilation=dilation)

713
        torch.testing.assert_close(
714
            res.to(expected), expected, rtol=tol, atol=tol, msg=f"\nres:\n{res}\nexpected:\n{expected}"
715
        )
716
717
718
719

        # no modulation test
        res = layer(x, offset)
        expected = self.expected_fn(x, weight, offset, None, bias, stride=stride, padding=padding, dilation=dilation)
720

721
        torch.testing.assert_close(
722
            res.to(expected), expected, rtol=tol, atol=tol, msg=f"\nres:\n{res}\nexpected:\n{expected}"
723
        )
724

725
726
727
728
729
    def test_wrong_sizes(self):
        in_channels = 6
        out_channels = 2
        kernel_size = (3, 2)
        groups = 2
730
731
732
733
734
735
        x, _, offset, mask, _, stride, padding, dilation = self.get_fn_args(
            "cpu", contiguous=True, batch_sz=10, dtype=self.dtype
        )
        layer = ops.DeformConv2d(
            in_channels, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups
        )
736
        with pytest.raises(RuntimeError, match="the shape of the offset"):
737
            wrong_offset = torch.rand_like(offset[:, :2])
738
            layer(x, wrong_offset)
739

740
        with pytest.raises(RuntimeError, match=r"mask.shape\[1\] is not valid"):
741
            wrong_mask = torch.rand_like(mask[:, :2])
742
            layer(x, offset, wrong_mask)
743

744
745
746
    @pytest.mark.parametrize("device", cpu_and_gpu())
    @pytest.mark.parametrize("contiguous", (True, False))
    @pytest.mark.parametrize("batch_sz", (0, 33))
747
    def test_backward(self, device, contiguous, batch_sz):
748
749
750
        x, weight, offset, mask, bias, stride, padding, dilation = self.get_fn_args(
            device, contiguous, batch_sz, self.dtype
        )
751
752

        def func(x_, offset_, mask_, weight_, bias_):
753
754
755
            return ops.deform_conv2d(
                x_, offset_, weight_, bias_, stride=stride, padding=padding, dilation=dilation, mask=mask_
            )
756

757
        gradcheck(func, (x, offset, mask, weight, bias), nondet_tol=1e-5, fast_mode=True)
758
759

        def func_no_mask(x_, offset_, weight_, bias_):
760
761
762
            return ops.deform_conv2d(
                x_, offset_, weight_, bias_, stride=stride, padding=padding, dilation=dilation, mask=None
            )
763

764
        gradcheck(func_no_mask, (x, offset, weight, bias), nondet_tol=1e-5, fast_mode=True)
765
766
767
768

        @torch.jit.script
        def script_func(x_, offset_, mask_, weight_, bias_, stride_, pad_, dilation_):
            # type:(Tensor, Tensor, Tensor, Tensor, Tensor, Tuple[int, int], Tuple[int, int], Tuple[int, int])->Tensor
769
770
771
772
773
774
775
776
777
778
            return ops.deform_conv2d(
                x_, offset_, weight_, bias_, stride=stride_, padding=pad_, dilation=dilation_, mask=mask_
            )

        gradcheck(
            lambda z, off, msk, wei, bi: script_func(z, off, msk, wei, bi, stride, padding, dilation),
            (x, offset, mask, weight, bias),
            nondet_tol=1e-5,
            fast_mode=True,
        )
779
780

        @torch.jit.script
781
782
        def script_func_no_mask(x_, offset_, weight_, bias_, stride_, pad_, dilation_):
            # type:(Tensor, Tensor, Tensor, Tensor, Tuple[int, int], Tuple[int, int], Tuple[int, int])->Tensor
783
784
785
786
787
788
789
790
791
792
            return ops.deform_conv2d(
                x_, offset_, weight_, bias_, stride=stride_, padding=pad_, dilation=dilation_, mask=None
            )

        gradcheck(
            lambda z, off, wei, bi: script_func_no_mask(z, off, wei, bi, stride, padding, dilation),
            (x, offset, weight, bias),
            nondet_tol=1e-5,
            fast_mode=True,
        )
793

794
    @needs_cuda
795
    @pytest.mark.parametrize("contiguous", (True, False))
796
    def test_compare_cpu_cuda_grads(self, contiguous):
797
798
799
        # Test from https://github.com/pytorch/vision/issues/2598
        # Run on CUDA only

800
801
        # compare grads computed on CUDA with grads computed on CPU
        true_cpu_grads = None
802

803
804
805
806
        init_weight = torch.randn(9, 9, 3, 3, requires_grad=True)
        img = torch.randn(8, 9, 1000, 110)
        offset = torch.rand(8, 2 * 3 * 3, 1000, 110)
        mask = torch.rand(8, 3 * 3, 1000, 110)
807

808
809
810
811
812
813
814
        if not contiguous:
            img = img.permute(0, 1, 3, 2).contiguous().permute(0, 1, 3, 2)
            offset = offset.permute(1, 3, 0, 2).contiguous().permute(2, 0, 3, 1)
            mask = mask.permute(1, 3, 0, 2).contiguous().permute(2, 0, 3, 1)
            weight = init_weight.permute(3, 2, 0, 1).contiguous().permute(2, 3, 1, 0)
        else:
            weight = init_weight
815

816
        for d in ["cpu", "cuda"]:
817

818
819
820
821
822
823
824
825
826
827
828
            out = ops.deform_conv2d(img.to(d), offset.to(d), weight.to(d), padding=1, mask=mask.to(d))
            out.mean().backward()
            if true_cpu_grads is None:
                true_cpu_grads = init_weight.grad
                assert true_cpu_grads is not None
            else:
                assert init_weight.grad is not None
                res_grads = init_weight.grad.to("cpu")
                torch.testing.assert_close(true_cpu_grads, res_grads)

    @needs_cuda
829
830
    @pytest.mark.parametrize("batch_sz", (0, 33))
    @pytest.mark.parametrize("dtype", (torch.float, torch.half))
831
832
833
834
    def test_autocast(self, batch_sz, dtype):
        with torch.cuda.amp.autocast():
            self.test_forward(torch.device("cuda"), contiguous=False, batch_sz=batch_sz, dtype=dtype)

835
836
837
838
    def test_forward_scriptability(self):
        # Non-regression test for https://github.com/pytorch/vision/issues/4078
        torch.jit.script(ops.DeformConv2d(in_channels=8, out_channels=8, kernel_size=3))

839
840

class TestFrozenBNT:
841
842
    def test_frozenbatchnorm2d_repr(self):
        num_features = 32
843
844
        eps = 1e-5
        t = ops.misc.FrozenBatchNorm2d(num_features, eps=eps)
845
846

        # Check integrity of object __repr__ attribute
847
        expected_string = f"FrozenBatchNorm2d({num_features}, eps={eps})"
848
        assert repr(t) == expected_string
849

850
851
852
    @pytest.mark.parametrize("seed", range(10))
    def test_frozenbatchnorm2d_eps(self, seed):
        torch.random.manual_seed(seed)
853
854
        sample_size = (4, 32, 28, 28)
        x = torch.rand(sample_size)
855
856
857
858
859
860
861
        state_dict = dict(
            weight=torch.rand(sample_size[1]),
            bias=torch.rand(sample_size[1]),
            running_mean=torch.rand(sample_size[1]),
            running_var=torch.rand(sample_size[1]),
            num_batches_tracked=torch.tensor(100),
        )
862

863
        # Check that default eps is equal to the one of BN
864
865
        fbn = ops.misc.FrozenBatchNorm2d(sample_size[1])
        fbn.load_state_dict(state_dict, strict=False)
866
        bn = torch.nn.BatchNorm2d(sample_size[1]).eval()
867
868
        bn.load_state_dict(state_dict)
        # Difference is expected to fall in an acceptable range
869
        torch.testing.assert_close(fbn(x), bn(x), rtol=1e-5, atol=1e-6)
870
871
872
873
874
875

        # Check computation for eps > 0
        fbn = ops.misc.FrozenBatchNorm2d(sample_size[1], eps=1e-5)
        fbn.load_state_dict(state_dict, strict=False)
        bn = torch.nn.BatchNorm2d(sample_size[1], eps=1e-5).eval()
        bn.load_state_dict(state_dict)
876
        torch.testing.assert_close(fbn(x), bn(x), rtol=1e-5, atol=1e-6)
877
878
879
880

    def test_frozenbatchnorm2d_n_arg(self):
        """Ensure a warning is thrown when passing `n` kwarg
        (remove this when support of `n` is dropped)"""
881
882
        with pytest.warns(DeprecationWarning):
            ops.misc.FrozenBatchNorm2d(32, eps=1e-5, n=32)
883

884

885
class TestBoxConversion:
886
887
888
    def _get_box_sequences():
        # Define here the argument type of `boxes` supported by region pooling operations
        box_tensor = torch.tensor([[0, 0, 0, 100, 100], [1, 0, 0, 100, 100]], dtype=torch.float)
889
890
891
892
        box_list = [
            torch.tensor([[0, 0, 100, 100]], dtype=torch.float),
            torch.tensor([[0, 0, 100, 100]], dtype=torch.float),
        ]
893
894
895
        box_tuple = tuple(box_list)
        return box_tensor, box_list, box_tuple

896
    @pytest.mark.parametrize("box_sequence", _get_box_sequences())
897
    def test_check_roi_boxes_shape(self, box_sequence):
898
        # Ensure common sequences of tensors are supported
899
        ops._utils.check_roi_boxes_shape(box_sequence)
900

901
    @pytest.mark.parametrize("box_sequence", _get_box_sequences())
902
    def test_convert_boxes_to_roi_format(self, box_sequence):
903
904
        # Ensure common sequences of tensors yield the same result
        ref_tensor = None
905
906
907
908
        if ref_tensor is None:
            ref_tensor = box_sequence
        else:
            assert_equal(ref_tensor, ops._utils.convert_boxes_to_roi_format(box_sequence))
909
910


911
class TestBox:
912
    def test_bbox_same(self):
913
914
915
        box_tensor = torch.tensor(
            [[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float
        )
916

917
        exp_xyxy = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)
918

919
920
921
922
        assert exp_xyxy.size() == torch.Size([4, 4])
        assert_equal(ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xyxy"), exp_xyxy)
        assert_equal(ops.box_convert(box_tensor, in_fmt="xywh", out_fmt="xywh"), exp_xyxy)
        assert_equal(ops.box_convert(box_tensor, in_fmt="cxcywh", out_fmt="cxcywh"), exp_xyxy)
923
924
925
926

    def test_bbox_xyxy_xywh(self):
        # Simple test convert boxes to xywh and back. Make sure they are same.
        # box_tensor is in x1 y1 x2 y2 format.
927
928
929
930
        box_tensor = torch.tensor(
            [[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float
        )
        exp_xywh = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float)
931

932
        assert exp_xywh.size() == torch.Size([4, 4])
933
        box_xywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xywh")
934
        assert_equal(box_xywh, exp_xywh)
935
936
937

        # Reverse conversion
        box_xyxy = ops.box_convert(box_xywh, in_fmt="xywh", out_fmt="xyxy")
938
        assert_equal(box_xyxy, box_tensor)
939
940
941
942

    def test_bbox_xyxy_cxcywh(self):
        # Simple test convert boxes to xywh and back. Make sure they are same.
        # box_tensor is in x1 y1 x2 y2 format.
943
944
945
946
947
948
        box_tensor = torch.tensor(
            [[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float
        )
        exp_cxcywh = torch.tensor(
            [[50, 50, 100, 100], [0, 0, 0, 0], [20, 25, 20, 20], [58, 65, 70, 60]], dtype=torch.float
        )
949

950
        assert exp_cxcywh.size() == torch.Size([4, 4])
951
        box_cxcywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="cxcywh")
952
        assert_equal(box_cxcywh, exp_cxcywh)
953
954
955

        # Reverse conversion
        box_xyxy = ops.box_convert(box_cxcywh, in_fmt="cxcywh", out_fmt="xyxy")
956
        assert_equal(box_xyxy, box_tensor)
957
958

    def test_bbox_xywh_cxcywh(self):
959
960
961
        box_tensor = torch.tensor(
            [[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float
        )
962
963

        # This is wrong
964
965
966
        exp_cxcywh = torch.tensor(
            [[50, 50, 100, 100], [0, 0, 0, 0], [20, 25, 20, 20], [58, 65, 70, 60]], dtype=torch.float
        )
967

968
        assert exp_cxcywh.size() == torch.Size([4, 4])
969
        box_cxcywh = ops.box_convert(box_tensor, in_fmt="xywh", out_fmt="cxcywh")
970
        assert_equal(box_cxcywh, exp_cxcywh)
971
972
973

        # Reverse conversion
        box_xywh = ops.box_convert(box_cxcywh, in_fmt="cxcywh", out_fmt="xywh")
974
        assert_equal(box_xywh, box_tensor)
975

976
977
    @pytest.mark.parametrize("inv_infmt", ["xwyh", "cxwyh"])
    @pytest.mark.parametrize("inv_outfmt", ["xwcx", "xhwcy"])
978
    def test_bbox_invalid(self, inv_infmt, inv_outfmt):
979
980
981
        box_tensor = torch.tensor(
            [[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float
        )
982

983
984
        with pytest.raises(ValueError):
            ops.box_convert(box_tensor, inv_infmt, inv_outfmt)
985
986

    def test_bbox_convert_jit(self):
987
988
989
        box_tensor = torch.tensor(
            [[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float
        )
990

991
992
        scripted_fn = torch.jit.script(ops.box_convert)
        TOLERANCE = 1e-3
993

994
        box_xywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xywh")
995
        scripted_xywh = scripted_fn(box_tensor, "xyxy", "xywh")
996
        torch.testing.assert_close(scripted_xywh, box_xywh, rtol=0.0, atol=TOLERANCE)
997

998
        box_cxcywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="cxcywh")
999
        scripted_cxcywh = scripted_fn(box_tensor, "xyxy", "cxcywh")
1000
        torch.testing.assert_close(scripted_cxcywh, box_cxcywh, rtol=0.0, atol=TOLERANCE)
1001
1002


1003
class TestBoxArea:
Aditya Oke's avatar
Aditya Oke committed
1004
    def test_box_area(self):
1005
1006
        def area_check(box, expected, tolerance=1e-4):
            out = ops.box_area(box)
1007
            torch.testing.assert_close(out, expected, rtol=0.0, check_dtype=False, atol=tolerance)
1008
1009
1010
1011
1012
1013
1014
1015
1016

        # Check for int boxes
        for dtype in [torch.int8, torch.int16, torch.int32, torch.int64]:
            box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0]], dtype=dtype)
            expected = torch.tensor([10000, 0])
            area_check(box_tensor, expected)

        # Check for float32 and float64 boxes
        for dtype in [torch.float32, torch.float64]:
1017
1018
1019
1020
1021
1022
1023
1024
            box_tensor = torch.tensor(
                [
                    [285.3538, 185.5758, 1193.5110, 851.4551],
                    [285.1472, 188.7374, 1192.4984, 851.0669],
                    [279.2440, 197.9812, 1189.4746, 849.2019],
                ],
                dtype=dtype,
            )
1025
1026
1027
1028
            expected = torch.tensor([604723.0806, 600965.4666, 592761.0085], dtype=torch.float64)
            area_check(box_tensor, expected, tolerance=0.05)

        # Check for float16 box
1029
1030
1031
1032
        box_tensor = torch.tensor(
            [[285.25, 185.625, 1194.0, 851.5], [285.25, 188.75, 1192.0, 851.0], [279.25, 198.0, 1189.0, 849.0]],
            dtype=torch.float16,
        )
1033
1034
        expected = torch.tensor([605113.875, 600495.1875, 592247.25])
        area_check(box_tensor, expected)
Aditya Oke's avatar
Aditya Oke committed
1035

Aditya Oke's avatar
Aditya Oke committed
1036
1037
1038
1039
1040
1041
1042
1043
    def test_box_area_jit(self):
        box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0]], dtype=torch.float)
        TOLERANCE = 1e-3
        expected = ops.box_area(box_tensor)
        scripted_fn = torch.jit.script(ops.box_area)
        scripted_area = scripted_fn(box_tensor)
        torch.testing.assert_close(scripted_area, expected, rtol=0.0, atol=TOLERANCE)

Aditya Oke's avatar
Aditya Oke committed
1044

1045
class TestBoxIou:
Aditya Oke's avatar
Aditya Oke committed
1046
    def test_iou(self):
1047
1048
        def iou_check(box, expected, tolerance=1e-4):
            out = ops.box_iou(box, box)
1049
            torch.testing.assert_close(out, expected, rtol=0.0, check_dtype=False, atol=tolerance)
1050
1051
1052
1053
1054
1055
1056
1057
1058

        # Check for int boxes
        for dtype in [torch.int16, torch.int32, torch.int64]:
            box = torch.tensor([[0, 0, 100, 100], [0, 0, 50, 50], [200, 200, 300, 300]], dtype=dtype)
            expected = torch.tensor([[1.0, 0.25, 0.0], [0.25, 1.0, 0.0], [0.0, 0.0, 1.0]])
            iou_check(box, expected)

        # Check for float boxes
        for dtype in [torch.float16, torch.float32, torch.float64]:
1059
1060
1061
1062
1063
1064
1065
1066
            box_tensor = torch.tensor(
                [
                    [285.3538, 185.5758, 1193.5110, 851.4551],
                    [285.1472, 188.7374, 1192.4984, 851.0669],
                    [279.2440, 197.9812, 1189.4746, 849.2019],
                ],
                dtype=dtype,
            )
1067
1068
            expected = torch.tensor([[1.0, 0.9933, 0.9673], [0.9933, 1.0, 0.9737], [0.9673, 0.9737, 1.0]])
            iou_check(box_tensor, expected, tolerance=0.002 if dtype == torch.float16 else 1e-4)
Aditya Oke's avatar
Aditya Oke committed
1069

Aditya Oke's avatar
Aditya Oke committed
1070
1071
1072
1073
1074
1075
1076
1077
    def test_iou_jit(self):
        box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 50, 50], [200, 200, 300, 300]], dtype=torch.float)
        TOLERANCE = 1e-3
        expected = ops.box_iou(box_tensor, box_tensor)
        scripted_fn = torch.jit.script(ops.box_iou)
        scripted_iou = scripted_fn(box_tensor, box_tensor)
        torch.testing.assert_close(scripted_iou, expected, rtol=0.0, atol=TOLERANCE)

Aditya Oke's avatar
Aditya Oke committed
1078

1079
class TestGenBoxIou:
Aditya Oke's avatar
Aditya Oke committed
1080
    def test_gen_iou(self):
1081
1082
        def gen_iou_check(box, expected, tolerance=1e-4):
            out = ops.generalized_box_iou(box, box)
1083
            torch.testing.assert_close(out, expected, rtol=0.0, check_dtype=False, atol=tolerance)
1084
1085
1086
1087
1088
1089
1090
1091
1092

        # Check for int boxes
        for dtype in [torch.int16, torch.int32, torch.int64]:
            box = torch.tensor([[0, 0, 100, 100], [0, 0, 50, 50], [200, 200, 300, 300]], dtype=dtype)
            expected = torch.tensor([[1.0, 0.25, -0.7778], [0.25, 1.0, -0.8611], [-0.7778, -0.8611, 1.0]])
            gen_iou_check(box, expected)

        # Check for float boxes
        for dtype in [torch.float16, torch.float32, torch.float64]:
1093
1094
1095
1096
1097
1098
1099
1100
            box_tensor = torch.tensor(
                [
                    [285.3538, 185.5758, 1193.5110, 851.4551],
                    [285.1472, 188.7374, 1192.4984, 851.0669],
                    [279.2440, 197.9812, 1189.4746, 849.2019],
                ],
                dtype=dtype,
            )
1101
1102
            expected = torch.tensor([[1.0, 0.9933, 0.9673], [0.9933, 1.0, 0.9737], [0.9673, 0.9737, 1.0]])
            gen_iou_check(box_tensor, expected, tolerance=0.002 if dtype == torch.float16 else 1e-3)
Aditya Oke's avatar
Aditya Oke committed
1103

Aditya Oke's avatar
Aditya Oke committed
1104
1105
1106
1107
1108
1109
1110
1111
    def test_giou_jit(self):
        box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 50, 50], [200, 200, 300, 300]], dtype=torch.float)
        TOLERANCE = 1e-3
        expected = ops.generalized_box_iou(box_tensor, box_tensor)
        scripted_fn = torch.jit.script(ops.generalized_box_iou)
        scripted_iou = scripted_fn(box_tensor, box_tensor)
        torch.testing.assert_close(scripted_iou, expected, rtol=0.0, atol=TOLERANCE)

Aditya Oke's avatar
Aditya Oke committed
1112

1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
class TestMasksToBoxes:
    def test_masks_box(self):
        def masks_box_check(masks, expected, tolerance=1e-4):
            out = ops.masks_to_boxes(masks)
            assert out.dtype == torch.float
            torch.testing.assert_close(out, expected, rtol=0.0, check_dtype=False, atol=tolerance)

        # Check for int type boxes.
        def _get_image():
            assets_directory = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets")
            mask_path = os.path.join(assets_directory, "masks.tiff")
            image = Image.open(mask_path)
            return image

        def _create_masks(image, masks):
            for index in range(image.n_frames):
                image.seek(index)
                frame = np.array(image)
                masks[index] = torch.tensor(frame)

            return masks

1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
        expected = torch.tensor(
            [
                [127, 2, 165, 40],
                [2, 50, 44, 92],
                [56, 63, 98, 100],
                [139, 68, 175, 104],
                [160, 112, 198, 145],
                [49, 138, 99, 182],
                [108, 148, 152, 213],
            ],
            dtype=torch.float,
        )
1147
1148
1149
1150
1151
1152
1153
1154

        image = _get_image()
        for dtype in [torch.float16, torch.float32, torch.float64]:
            masks = torch.zeros((image.n_frames, image.height, image.width), dtype=dtype)
            masks = _create_masks(image, masks)
            masks_box_check(masks, expected)


1155
class TestStochasticDepth:
1156
    @pytest.mark.parametrize("seed", range(10))
1157
1158
    @pytest.mark.parametrize("p", [0.2, 0.5, 0.8])
    @pytest.mark.parametrize("mode", ["batch", "row"])
1159
1160
    def test_stochastic_depth_random(self, seed, mode, p):
        torch.manual_seed(seed)
1161
1162
1163
        stats = pytest.importorskip("scipy.stats")
        batch_size = 5
        x = torch.ones(size=(batch_size, 3, 4, 4))
1164
        layer = ops.StochasticDepth(p=p, mode=mode)
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
        layer.__repr__()

        trials = 250
        num_samples = 0
        counts = 0
        for _ in range(trials):
            out = layer(x)
            non_zero_count = out.sum(dim=(1, 2, 3)).nonzero().size(0)
            if mode == "batch":
                if non_zero_count == 0:
                    counts += 1
                num_samples += 1
            elif mode == "row":
                counts += batch_size - non_zero_count
                num_samples += batch_size

        p_value = stats.binom_test(counts, num_samples, p=p)
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
        assert p_value > 0.01

    @pytest.mark.parametrize("seed", range(10))
    @pytest.mark.parametrize("p", (0, 1))
    @pytest.mark.parametrize("mode", ["batch", "row"])
    def test_stochastic_depth(self, seed, mode, p):
        torch.manual_seed(seed)
        batch_size = 5
        x = torch.ones(size=(batch_size, 3, 4, 4))
        layer = ops.StochasticDepth(p=p, mode=mode)

        out = layer(x)
        if p == 0:
            assert out.equal(x)
        elif p == 1:
            assert out.equal(torch.zeros_like(x))
1198
1199


1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
class TestUtils:
    @pytest.mark.parametrize("norm_layer", [None, nn.BatchNorm2d, nn.LayerNorm])
    def test_split_normalization_params(self, norm_layer):
        model = models.mobilenet_v3_large(norm_layer=norm_layer)
        params = ops._utils.split_normalization_params(model, None if norm_layer is None else [norm_layer])

        assert len(params[0]) == 92
        assert len(params[1]) == 82


1210
if __name__ == "__main__":
1211
    pytest.main([__file__])