test_ops.py 49.3 KB
Newer Older
1
import math
2
import os
3
from abc import ABC, abstractmethod
4
5
from functools import lru_cache
from typing import Tuple
6

7
import numpy as np
8
import pytest
9
import torch
10
11
from common_utils import needs_cuda, cpu_and_gpu, assert_equal
from PIL import Image
12
from torch import nn, Tensor
13
from torch.autograd import gradcheck
14
from torch.nn.modules.utils import _pair
15
from torchvision import models, ops
16
17


18
19
class RoIOpTester(ABC):
    dtype = torch.float64
20

21
22
    @pytest.mark.parametrize("device", cpu_and_gpu())
    @pytest.mark.parametrize("contiguous", (True, False))
23
    def test_forward(self, device, contiguous, x_dtype=None, rois_dtype=None, **kwargs):
24
25
        x_dtype = self.dtype if x_dtype is None else x_dtype
        rois_dtype = self.dtype if rois_dtype is None else rois_dtype
26
27
28
        pool_size = 5
        # n_channels % (pool_size ** 2) == 0 required for PS opeartions.
        n_channels = 2 * (pool_size ** 2)
29
        x = torch.rand(2, n_channels, 10, 10, dtype=x_dtype, device=device)
30
31
        if not contiguous:
            x = x.permute(0, 1, 3, 2)
32
33
34
35
36
        rois = torch.tensor(
            [[0, 0, 0, 9, 9], [0, 0, 5, 4, 9], [0, 5, 5, 9, 9], [1, 0, 0, 9, 9]],  # format is (xyxy)
            dtype=rois_dtype,
            device=device,
        )
37

38
        pool_h, pool_w = pool_size, pool_size
39
        y = self.fn(x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs)
40
        # the following should be true whether we're running an autocast test or not.
41
        assert y.dtype == x.dtype
42
43
44
        gt_y = self.expected_fn(
            x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, device=device, dtype=self.dtype, **kwargs
        )
45

46
        tol = 1e-3 if (x_dtype is torch.half or rois_dtype is torch.half) else 1e-5
47
        torch.testing.assert_close(gt_y.to(y), y, rtol=tol, atol=tol)
48

49
    @pytest.mark.parametrize("seed", range(10))
50
51
    @pytest.mark.parametrize("device", cpu_and_gpu())
    @pytest.mark.parametrize("contiguous", (True, False))
52
53
    def test_backward(self, seed, device, contiguous):
        torch.random.manual_seed(seed)
54
55
56
57
        pool_size = 2
        x = torch.rand(1, 2 * (pool_size ** 2), 5, 5, dtype=self.dtype, device=device, requires_grad=True)
        if not contiguous:
            x = x.permute(0, 1, 3, 2)
58
59
60
        rois = torch.tensor(
            [[0, 0, 0, 4, 4], [0, 0, 2, 3, 4], [0, 2, 2, 4, 4]], dtype=self.dtype, device=device  # format is (xyxy)
        )
61

62
63
        def func(z):
            return self.fn(z, rois, pool_size, pool_size, spatial_scale=1, sampling_ratio=1)
64

65
        script_func = self.get_script_fn(rois, pool_size)
66

67
68
        gradcheck(func, (x,))
        gradcheck(script_func, (x,))
69

70
    @needs_cuda
71
72
    @pytest.mark.parametrize("x_dtype", (torch.float, torch.half))
    @pytest.mark.parametrize("rois_dtype", (torch.float, torch.half))
73
74
75
    def test_autocast(self, x_dtype, rois_dtype):
        with torch.cuda.amp.autocast():
            self.test_forward(torch.device("cuda"), contiguous=False, x_dtype=x_dtype, rois_dtype=rois_dtype)
76
77
78

    def _helper_boxes_shape(self, func):
        # test boxes as Tensor[N, 5]
79
        with pytest.raises(AssertionError):
80
81
82
83
84
            a = torch.linspace(1, 8 * 8, 8 * 8).reshape(1, 1, 8, 8)
            boxes = torch.tensor([[0, 0, 3, 3]], dtype=a.dtype)
            func(a, boxes, output_size=(2, 2))

        # test boxes as List[Tensor[N, 4]]
85
        with pytest.raises(AssertionError):
86
87
88
89
            a = torch.linspace(1, 8 * 8, 8 * 8).reshape(1, 1, 8, 8)
            boxes = torch.tensor([[0, 0, 3]], dtype=a.dtype)
            ops.roi_pool(a, [boxes], output_size=(2, 2))

90
    @abstractmethod
91
92
    def fn(*args, **kwargs):
        pass
93

94
    @abstractmethod
95
96
    def get_script_fn(*args, **kwargs):
        pass
97

98
    @abstractmethod
99
100
    def expected_fn(*args, **kwargs):
        pass
101

102

103
class TestRoiPool(RoIOpTester):
104
105
    def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs):
        return ops.RoIPool((pool_h, pool_w), spatial_scale)(x, rois)
106

107
    def get_script_fn(self, rois, pool_size):
Nicolas Hug's avatar
Nicolas Hug committed
108
109
        scriped = torch.jit.script(ops.roi_pool)
        return lambda x: scriped(x, rois, pool_size)
110

111
112
113
    def expected_fn(
        self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, device=None, dtype=torch.float64
    ):
114
115
        if device is None:
            device = torch.device("cpu")
116

117
118
        n_channels = x.size(1)
        y = torch.zeros(rois.size(0), n_channels, pool_h, pool_w, dtype=dtype, device=device)
119

120
121
        def get_slice(k, block):
            return slice(int(np.floor(k * block)), int(np.ceil((k + 1) * block)))
122

123
124
125
        for roi_idx, roi in enumerate(rois):
            batch_idx = int(roi[0])
            j_begin, i_begin, j_end, i_end = (int(round(x.item() * spatial_scale)) for x in roi[1:])
126
            roi_x = x[batch_idx, :, i_begin : i_end + 1, j_begin : j_end + 1]
127

128
129
130
            roi_h, roi_w = roi_x.shape[-2:]
            bin_h = roi_h / pool_h
            bin_w = roi_w / pool_w
131

132
133
134
135
136
137
            for i in range(0, pool_h):
                for j in range(0, pool_w):
                    bin_x = roi_x[:, get_slice(i, bin_h), get_slice(j, bin_w)]
                    if bin_x.numel() > 0:
                        y[roi_idx, :, i, j] = bin_x.reshape(n_channels, -1).max(dim=1)[0]
        return y
138

139
    def test_boxes_shape(self):
140
141
        self._helper_boxes_shape(ops.roi_pool)

142

143
class TestPSRoIPool(RoIOpTester):
144
145
    def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs):
        return ops.PSRoIPool((pool_h, pool_w), 1)(x, rois)
146

147
    def get_script_fn(self, rois, pool_size):
Nicolas Hug's avatar
Nicolas Hug committed
148
149
        scriped = torch.jit.script(ops.ps_roi_pool)
        return lambda x: scriped(x, rois, pool_size)
150

151
152
153
    def expected_fn(
        self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, device=None, dtype=torch.float64
    ):
154
155
156
        if device is None:
            device = torch.device("cpu")
        n_input_channels = x.size(1)
157
        assert n_input_channels % (pool_h * pool_w) == 0, "input channels must be divisible by ph * pw"
158
159
160
161
162
163
164
165
166
        n_output_channels = int(n_input_channels / (pool_h * pool_w))
        y = torch.zeros(rois.size(0), n_output_channels, pool_h, pool_w, dtype=dtype, device=device)

        def get_slice(k, block):
            return slice(int(np.floor(k * block)), int(np.ceil((k + 1) * block)))

        for roi_idx, roi in enumerate(rois):
            batch_idx = int(roi[0])
            j_begin, i_begin, j_end, i_end = (int(round(x.item() * spatial_scale)) for x in roi[1:])
167
            roi_x = x[batch_idx, :, i_begin : i_end + 1, j_begin : j_end + 1]
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182

            roi_height = max(i_end - i_begin, 1)
            roi_width = max(j_end - j_begin, 1)
            bin_h, bin_w = roi_height / float(pool_h), roi_width / float(pool_w)

            for i in range(0, pool_h):
                for j in range(0, pool_w):
                    bin_x = roi_x[:, get_slice(i, bin_h), get_slice(j, bin_w)]
                    if bin_x.numel() > 0:
                        area = bin_x.size(-2) * bin_x.size(-1)
                        for c_out in range(0, n_output_channels):
                            c_in = c_out * (pool_h * pool_w) + pool_w * i + j
                            t = torch.sum(bin_x[c_in, :, :])
                            y[roi_idx, c_out, i, j] = t / area
        return y
183

184
    def test_boxes_shape(self):
185
186
        self._helper_boxes_shape(ops.ps_roi_pool)

187

188
189
def bilinear_interpolate(data, y, x, snap_border=False):
    height, width = data.shape
190

191
192
193
194
195
    if snap_border:
        if -1 < y <= 0:
            y = 0
        elif height - 1 <= y < height:
            y = height - 1
196

197
198
199
200
        if -1 < x <= 0:
            x = 0
        elif width - 1 <= x < width:
            x = width - 1
201

202
203
204
205
    y_low = int(math.floor(y))
    x_low = int(math.floor(x))
    y_high = y_low + 1
    x_high = x_low + 1
206

207
208
    wy_h = y - y_low
    wx_h = x - x_low
209
    wy_l = 1 - wy_h
210
    wx_l = 1 - wx_h
211

212
    val = 0
213
214
215
216
    for wx, xp in zip((wx_l, wx_h), (x_low, x_high)):
        for wy, yp in zip((wy_l, wy_h), (y_low, y_high)):
            if 0 <= yp < height and 0 <= xp < width:
                val += wx * wy * data[yp, xp]
217
    return val
218
219


220
class TestRoIAlign(RoIOpTester):
AhnDW's avatar
AhnDW committed
221
    def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, aligned=False, **kwargs):
222
223
224
        return ops.RoIAlign(
            (pool_h, pool_w), spatial_scale=spatial_scale, sampling_ratio=sampling_ratio, aligned=aligned
        )(x, rois)
225

226
    def get_script_fn(self, rois, pool_size):
Nicolas Hug's avatar
Nicolas Hug committed
227
228
        scriped = torch.jit.script(ops.roi_align)
        return lambda x: scriped(x, rois, pool_size)
229

230
231
232
233
234
235
236
237
238
239
240
241
    def expected_fn(
        self,
        in_data,
        rois,
        pool_h,
        pool_w,
        spatial_scale=1,
        sampling_ratio=-1,
        aligned=False,
        device=None,
        dtype=torch.float64,
    ):
242
243
        if device is None:
            device = torch.device("cpu")
244
245
246
        n_channels = in_data.size(1)
        out_data = torch.zeros(rois.size(0), n_channels, pool_h, pool_w, dtype=dtype, device=device)

247
        offset = 0.5 if aligned else 0.0
AhnDW's avatar
AhnDW committed
248

249
250
        for r, roi in enumerate(rois):
            batch_idx = int(roi[0])
AhnDW's avatar
AhnDW committed
251
            j_begin, i_begin, j_end, i_end = (x.item() * spatial_scale - offset for x in roi[1:])
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271

            roi_h = i_end - i_begin
            roi_w = j_end - j_begin
            bin_h = roi_h / pool_h
            bin_w = roi_w / pool_w

            for i in range(0, pool_h):
                start_h = i_begin + i * bin_h
                grid_h = sampling_ratio if sampling_ratio > 0 else int(np.ceil(bin_h))
                for j in range(0, pool_w):
                    start_w = j_begin + j * bin_w
                    grid_w = sampling_ratio if sampling_ratio > 0 else int(np.ceil(bin_w))

                    for channel in range(0, n_channels):

                        val = 0
                        for iy in range(0, grid_h):
                            y = start_h + (iy + 0.5) * bin_h / grid_h
                            for ix in range(0, grid_w):
                                x = start_w + (ix + 0.5) * bin_w / grid_w
272
                                val += bilinear_interpolate(in_data[batch_idx, channel, :, :], y, x, snap_border=True)
273
274
275
                        val /= grid_h * grid_w

                        out_data[r, channel, i, j] = val
276
277
        return out_data

278
    def test_boxes_shape(self):
279
280
        self._helper_boxes_shape(ops.roi_align)

281
282
283
    @pytest.mark.parametrize("aligned", (True, False))
    @pytest.mark.parametrize("device", cpu_and_gpu())
    @pytest.mark.parametrize("contiguous", (True, False))
284
    def test_forward(self, device, contiguous, aligned, x_dtype=None, rois_dtype=None):
285
286
287
        super().test_forward(
            device=device, contiguous=contiguous, x_dtype=x_dtype, rois_dtype=rois_dtype, aligned=aligned
        )
288

289
    @needs_cuda
290
291
292
    @pytest.mark.parametrize("aligned", (True, False))
    @pytest.mark.parametrize("x_dtype", (torch.float, torch.half))
    @pytest.mark.parametrize("rois_dtype", (torch.float, torch.half))
293
294
    def test_autocast(self, aligned, x_dtype, rois_dtype):
        with torch.cuda.amp.autocast():
295
296
297
            self.test_forward(
                torch.device("cuda"), contiguous=False, aligned=aligned, x_dtype=x_dtype, rois_dtype=rois_dtype
            )
298
299
300
301
302
303
304

    def _make_rois(self, img_size, num_imgs, dtype, num_rois=1000):
        rois = torch.randint(0, img_size // 2, size=(num_rois, 5)).to(dtype)
        rois[:, 0] = torch.randint(0, num_imgs, size=(num_rois,))  # set batch index
        rois[:, 3:] += rois[:, 1:3]  # make sure boxes aren't degenerate
        return rois

305
306
307
    @pytest.mark.parametrize("aligned", (True, False))
    @pytest.mark.parametrize("scale, zero_point", ((1, 0), (2, 10), (0.1, 50)))
    @pytest.mark.parametrize("qdtype", (torch.qint8, torch.quint8, torch.qint32))
308
    def test_qroialign(self, aligned, scale, zero_point, qdtype):
309
310
311
312
313
314
315
        """Make sure quantized version of RoIAlign is close to float version"""
        pool_size = 5
        img_size = 10
        n_channels = 2
        num_imgs = 1
        dtype = torch.float

316
317
318
319
320
321
322
        x = torch.randint(50, 100, size=(num_imgs, n_channels, img_size, img_size)).to(dtype)
        qx = torch.quantize_per_tensor(x, scale=scale, zero_point=zero_point, dtype=qdtype)

        rois = self._make_rois(img_size, num_imgs, dtype)
        qrois = torch.quantize_per_tensor(rois, scale=scale, zero_point=zero_point, dtype=qdtype)

        x, rois = qx.dequantize(), qrois.dequantize()  # we want to pass the same inputs
323

324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
        y = ops.roi_align(
            x,
            rois,
            output_size=pool_size,
            spatial_scale=1,
            sampling_ratio=-1,
            aligned=aligned,
        )
        qy = ops.roi_align(
            qx,
            qrois,
            output_size=pool_size,
            spatial_scale=1,
            sampling_ratio=-1,
            aligned=aligned,
        )

        # The output qy is itself a quantized tensor and there might have been a loss of info when it was
        # quantized. For a fair comparison we need to quantize y as well
        quantized_float_y = torch.quantize_per_tensor(y, scale=scale, zero_point=zero_point, dtype=qdtype)

        try:
            # Ideally, we would assert this, which passes with (scale, zero) == (1, 0)
            assert (qy == quantized_float_y).all()
        except AssertionError:
            # But because the computation aren't exactly the same between the 2 RoIAlign procedures, some
            # rounding error may lead to a difference of 2 in the output.
            # For example with (scale, zero) = (2, 10), 45.00000... will be quantized to 44
            # but 45.00000001 will be rounded to 46. We make sure below that:
            # - such discrepancies between qy and quantized_float_y are very rare (less then 5%)
            # - any difference between qy and quantized_float_y is == scale
            diff_idx = torch.where(qy != quantized_float_y)
            num_diff = diff_idx[0].numel()
357
            assert num_diff / qy.numel() < 0.05
358
359
360
361
362
363
364

            abs_diff = torch.abs(qy[diff_idx].dequantize() - quantized_float_y[diff_idx].dequantize())
            t_scale = torch.full_like(abs_diff, fill_value=scale)
            torch.testing.assert_close(abs_diff, t_scale, rtol=1e-5, atol=1e-5)

    def test_qroi_align_multiple_images(self):
        dtype = torch.float
365
366
        x = torch.randint(50, 100, size=(2, 3, 10, 10)).to(dtype)
        qx = torch.quantize_per_tensor(x, scale=1, zero_point=0, dtype=torch.qint8)
367
        rois = self._make_rois(img_size=10, num_imgs=2, dtype=dtype, num_rois=10)
368
        qrois = torch.quantize_per_tensor(rois, scale=1, zero_point=0, dtype=torch.qint8)
369
370
        with pytest.raises(RuntimeError, match="Only one image per batch is allowed"):
            ops.roi_align(qx, qrois, output_size=5)
371

372

373
class TestPSRoIAlign(RoIOpTester):
374
    def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs):
375
        return ops.PSRoIAlign((pool_h, pool_w), spatial_scale=spatial_scale, sampling_ratio=sampling_ratio)(x, rois)
376

377
    def get_script_fn(self, rois, pool_size):
Nicolas Hug's avatar
Nicolas Hug committed
378
379
        scriped = torch.jit.script(ops.ps_roi_align)
        return lambda x: scriped(x, rois, pool_size)
380

381
382
383
    def expected_fn(
        self, in_data, rois, pool_h, pool_w, device, spatial_scale=1, sampling_ratio=-1, dtype=torch.float64
    ):
384
385
        if device is None:
            device = torch.device("cpu")
386
        n_input_channels = in_data.size(1)
387
        assert n_input_channels % (pool_h * pool_w) == 0, "input channels must be divisible by ph * pw"
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
        n_output_channels = int(n_input_channels / (pool_h * pool_w))
        out_data = torch.zeros(rois.size(0), n_output_channels, pool_h, pool_w, dtype=dtype, device=device)

        for r, roi in enumerate(rois):
            batch_idx = int(roi[0])
            j_begin, i_begin, j_end, i_end = (x.item() * spatial_scale - 0.5 for x in roi[1:])

            roi_h = i_end - i_begin
            roi_w = j_end - j_begin
            bin_h = roi_h / pool_h
            bin_w = roi_w / pool_w

            for i in range(0, pool_h):
                start_h = i_begin + i * bin_h
                grid_h = sampling_ratio if sampling_ratio > 0 else int(np.ceil(bin_h))
                for j in range(0, pool_w):
                    start_w = j_begin + j * bin_w
                    grid_w = sampling_ratio if sampling_ratio > 0 else int(np.ceil(bin_w))
                    for c_out in range(0, n_output_channels):
                        c_in = c_out * (pool_h * pool_w) + pool_w * i + j

                        val = 0
                        for iy in range(0, grid_h):
                            y = start_h + (iy + 0.5) * bin_h / grid_h
                            for ix in range(0, grid_w):
                                x = start_w + (ix + 0.5) * bin_w / grid_w
414
                                val += bilinear_interpolate(in_data[batch_idx, c_in, :, :], y, x, snap_border=True)
415
416
417
418
                        val /= grid_h * grid_w

                        out_data[r, c_out, i, j] = val
        return out_data
419

420
    def test_boxes_shape(self):
421
422
        self._helper_boxes_shape(ops.ps_roi_align)

423

424
class TestMultiScaleRoIAlign:
425
    def test_msroialign_repr(self):
426
        fmap_names = ["0"]
427
428
429
430
431
432
        output_size = (7, 7)
        sampling_ratio = 2
        # Pass mock feature map names
        t = ops.poolers.MultiScaleRoIAlign(fmap_names, output_size, sampling_ratio)

        # Check integrity of object __repr__ attribute
433
434
435
436
        expected_string = (
            f"MultiScaleRoIAlign(featmap_names={fmap_names}, output_size={output_size}, "
            f"sampling_ratio={sampling_ratio})"
        )
437
        assert repr(t) == expected_string
438
439


440
441
class TestNMS:
    def _reference_nms(self, boxes, scores, iou_threshold):
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
        """
        Args:
            box_scores (N, 5): boxes in corner-form and probabilities.
            iou_threshold: intersection over union threshold.
        Returns:
             picked: a list of indexes of the kept boxes
        """
        picked = []
        _, indexes = scores.sort(descending=True)
        while len(indexes) > 0:
            current = indexes[0]
            picked.append(current.item())
            if len(indexes) == 1:
                break
            current_box = boxes[current, :]
            indexes = indexes[1:]
            rest_boxes = boxes[indexes, :]
            iou = ops.box_iou(rest_boxes, current_box.unsqueeze(0)).squeeze(1)
            indexes = indexes[iou <= iou_threshold]

        return torch.as_tensor(picked)

464
465
466
467
468
    def _create_tensors_with_iou(self, N, iou_thresh):
        # force last box to have a pre-defined iou with the first box
        # let b0 be [x0, y0, x1, y1], and b1 be [x0, y0, x1 + d, y1],
        # then, in order to satisfy ops.iou(b0, b1) == iou_thresh,
        # we need to have d = (x1 - x0) * (1 - iou_thresh) / iou_thresh
469
470
471
        # Adjust the threshold upward a bit with the intent of creating
        # at least one box that exceeds (barely) the threshold and so
        # should be suppressed.
472
        boxes = torch.rand(N, 4) * 100
473
474
475
        boxes[:, 2:] += boxes[:, :2]
        boxes[-1, :] = boxes[0, :]
        x0, y0, x1, y1 = boxes[-1].tolist()
476
        iou_thresh += 1e-5
477
        boxes[-1, 2] += (x1 - x0) * (1 - iou_thresh) / iou_thresh
478
479
480
        scores = torch.rand(N)
        return boxes, scores

481
    @pytest.mark.parametrize("iou", (0.2, 0.5, 0.8))
482
483
484
    @pytest.mark.parametrize("seed", range(10))
    def test_nms_ref(self, iou, seed):
        torch.random.manual_seed(seed)
485
        err_msg = "NMS incompatible between CPU and reference implementation for IoU={}"
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
        boxes, scores = self._create_tensors_with_iou(1000, iou)
        keep_ref = self._reference_nms(boxes, scores, iou)
        keep = ops.nms(boxes, scores, iou)
        assert torch.allclose(keep, keep_ref), err_msg.format(iou)

    def test_nms_input_errors(self):
        with pytest.raises(RuntimeError):
            ops.nms(torch.rand(4), torch.rand(3), 0.5)
        with pytest.raises(RuntimeError):
            ops.nms(torch.rand(3, 5), torch.rand(3), 0.5)
        with pytest.raises(RuntimeError):
            ops.nms(torch.rand(3, 4), torch.rand(3, 2), 0.5)
        with pytest.raises(RuntimeError):
            ops.nms(torch.rand(3, 4), torch.rand(4), 0.5)

501
    @pytest.mark.parametrize("iou", (0.2, 0.5, 0.8))
502
503
    @pytest.mark.parametrize("scale, zero_point", ((1, 0), (2, 50), (3, 10)))
    def test_qnms(self, iou, scale, zero_point):
504
505
506
        # Note: we compare qnms vs nms instead of qnms vs reference implementation.
        # This is because with the int convertion, the trick used in _create_tensors_with_iou
        # doesn't really work (in fact, nms vs reference implem will also fail with ints)
507
        err_msg = "NMS and QNMS give different results for IoU={}"
508
509
        boxes, scores = self._create_tensors_with_iou(1000, iou)
        scores *= 100  # otherwise most scores would be 0 or 1 after int convertion
510

511
512
        qboxes = torch.quantize_per_tensor(boxes, scale=scale, zero_point=zero_point, dtype=torch.quint8)
        qscores = torch.quantize_per_tensor(scores, scale=scale, zero_point=zero_point, dtype=torch.quint8)
513

514
515
        boxes = qboxes.dequantize()
        scores = qscores.dequantize()
516

517
518
        keep = ops.nms(boxes, scores, iou)
        qkeep = ops.nms(qboxes, qscores, iou)
519

520
        assert torch.allclose(qkeep, keep), err_msg.format(iou)
521

522
    @needs_cuda
523
    @pytest.mark.parametrize("iou", (0.2, 0.5, 0.8))
524
    def test_nms_cuda(self, iou, dtype=torch.float64):
525
        tol = 1e-3 if dtype is torch.half else 1e-5
526
        err_msg = "NMS incompatible between CPU and CUDA for IoU={}"
527

528
529
530
531
532
533
534
535
536
537
538
539
        boxes, scores = self._create_tensors_with_iou(1000, iou)
        r_cpu = ops.nms(boxes, scores, iou)
        r_cuda = ops.nms(boxes.cuda(), scores.cuda(), iou)

        is_eq = torch.allclose(r_cpu, r_cuda.cpu())
        if not is_eq:
            # if the indices are not the same, ensure that it's because the scores
            # are duplicate
            is_eq = torch.allclose(scores[r_cpu], scores[r_cuda.cpu()], rtol=tol, atol=tol)
        assert is_eq, err_msg.format(iou)

    @needs_cuda
540
    @pytest.mark.parametrize("iou", (0.2, 0.5, 0.8))
541
542
543
544
545
546
    @pytest.mark.parametrize("dtype", (torch.float, torch.half))
    def test_autocast(self, iou, dtype):
        with torch.cuda.amp.autocast():
            self.test_nms_cuda(iou=iou, dtype=dtype)

    @needs_cuda
547
    def test_nms_cuda_float16(self):
548
549
550
551
552
553
554
        boxes = torch.tensor(
            [
                [285.3538, 185.5758, 1193.5110, 851.4551],
                [285.1472, 188.7374, 1192.4984, 851.0669],
                [279.2440, 197.9812, 1189.4746, 849.2019],
            ]
        ).cuda()
555
556
557
558
559
        scores = torch.tensor([0.6370, 0.7569, 0.3966]).cuda()

        iou_thres = 0.2
        keep32 = ops.nms(boxes, scores, iou_thres)
        keep16 = ops.nms(boxes.to(torch.float16), scores.to(torch.float16), iou_thres)
560
        assert_equal(keep32, keep16)
561

562
563
    @pytest.mark.parametrize("seed", range(10))
    def test_batched_nms_implementations(self, seed):
564
        """Make sure that both implementations of batched_nms yield identical results"""
565
        torch.random.manual_seed(seed)
566
567

        num_boxes = 1000
568
        iou_threshold = 0.9
569
570
571
572
573
574
575
576
577
578

        boxes = torch.cat((torch.rand(num_boxes, 2), torch.rand(num_boxes, 2) + 10), dim=1)
        assert max(boxes[:, 0]) < min(boxes[:, 2])  # x1 < x2
        assert max(boxes[:, 1]) < min(boxes[:, 3])  # y1 < y2

        scores = torch.rand(num_boxes)
        idxs = torch.randint(0, 4, size=(num_boxes,))
        keep_vanilla = ops.boxes._batched_nms_vanilla(boxes, scores, idxs, iou_threshold)
        keep_trick = ops.boxes._batched_nms_coordinate_trick(boxes, scores, idxs, iou_threshold)

579
580
581
        torch.testing.assert_close(
            keep_vanilla, keep_trick, msg="The vanilla and the trick implementation yield different nms outputs."
        )
582
583
584

        # Also make sure an empty tensor is returned if boxes is empty
        empty = torch.empty((0,), dtype=torch.int64)
585
        torch.testing.assert_close(empty, ops.batched_nms(empty, None, None, None))
586

587

588
589
590
class TestDeformConv:
    dtype = torch.float64

591
    def expected_fn(self, x, weight, offset, mask, bias, stride=1, padding=0, dilation=1):
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
        stride_h, stride_w = _pair(stride)
        pad_h, pad_w = _pair(padding)
        dil_h, dil_w = _pair(dilation)
        weight_h, weight_w = weight.shape[-2:]

        n_batches, n_in_channels, in_h, in_w = x.shape
        n_out_channels = weight.shape[0]

        out_h = (in_h + 2 * pad_h - (dil_h * (weight_h - 1) + 1)) // stride_h + 1
        out_w = (in_w + 2 * pad_w - (dil_w * (weight_w - 1) + 1)) // stride_w + 1

        n_offset_grps = offset.shape[1] // (2 * weight_h * weight_w)
        in_c_per_offset_grp = n_in_channels // n_offset_grps

        n_weight_grps = n_in_channels // weight.shape[1]
        in_c_per_weight_grp = weight.shape[1]
        out_c_per_weight_grp = n_out_channels // n_weight_grps

        out = torch.zeros(n_batches, n_out_channels, out_h, out_w, device=x.device, dtype=x.dtype)
        for b in range(n_batches):
            for c_out in range(n_out_channels):
                for i in range(out_h):
                    for j in range(out_w):
                        for di in range(weight_h):
                            for dj in range(weight_w):
                                for c in range(in_c_per_weight_grp):
                                    weight_grp = c_out // out_c_per_weight_grp
                                    c_in = weight_grp * in_c_per_weight_grp + c

                                    offset_grp = c_in // in_c_per_offset_grp
622
623
                                    mask_idx = offset_grp * (weight_h * weight_w) + di * weight_w + dj
                                    offset_idx = 2 * mask_idx
624
625
626
627

                                    pi = stride_h * i - pad_h + dil_h * di + offset[b, offset_idx, i, j]
                                    pj = stride_w * j - pad_w + dil_w * dj + offset[b, offset_idx + 1, i, j]

628
629
630
631
                                    mask_value = 1.0
                                    if mask is not None:
                                        mask_value = mask[b, mask_idx, i, j]

632
633
634
635
636
                                    out[b, c_out, i, j] += (
                                        mask_value
                                        * weight[c_out, c, di, dj]
                                        * bilinear_interpolate(x[b, c_in, :, :], pi, pj)
                                    )
637
638
639
        out += bias.view(1, n_out_channels, 1, 1)
        return out

640
    @lru_cache(maxsize=None)
641
    def get_fn_args(self, device, contiguous, batch_sz, dtype):
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
        n_in_channels = 6
        n_out_channels = 2
        n_weight_grps = 2
        n_offset_grps = 3

        stride = (2, 1)
        pad = (1, 0)
        dilation = (2, 1)

        stride_h, stride_w = stride
        pad_h, pad_w = pad
        dil_h, dil_w = dilation
        weight_h, weight_w = (3, 2)
        in_h, in_w = (5, 4)

        out_h = (in_h + 2 * pad_h - (dil_h * (weight_h - 1) + 1)) // stride_h + 1
        out_w = (in_w + 2 * pad_w - (dil_w * (weight_w - 1) + 1)) // stride_w + 1

660
        x = torch.rand(batch_sz, n_in_channels, in_h, in_w, device=device, dtype=dtype, requires_grad=True)
661

662
663
664
665
666
667
668
669
670
        offset = torch.randn(
            batch_sz,
            n_offset_grps * 2 * weight_h * weight_w,
            out_h,
            out_w,
            device=device,
            dtype=dtype,
            requires_grad=True,
        )
671

672
673
674
        mask = torch.randn(
            batch_sz, n_offset_grps * weight_h * weight_w, out_h, out_w, device=device, dtype=dtype, requires_grad=True
        )
675

676
677
678
679
680
681
682
683
684
        weight = torch.randn(
            n_out_channels,
            n_in_channels // n_weight_grps,
            weight_h,
            weight_w,
            device=device,
            dtype=dtype,
            requires_grad=True,
        )
685

686
        bias = torch.randn(n_out_channels, device=device, dtype=dtype, requires_grad=True)
687
688
689
690

        if not contiguous:
            x = x.permute(0, 1, 3, 2).contiguous().permute(0, 1, 3, 2)
            offset = offset.permute(1, 3, 0, 2).contiguous().permute(2, 0, 3, 1)
691
            mask = mask.permute(1, 3, 0, 2).contiguous().permute(2, 0, 3, 1)
692
693
            weight = weight.permute(3, 2, 0, 1).contiguous().permute(2, 3, 1, 0)

694
        return x, weight, offset, mask, bias, stride, pad, dilation
695

696
697
698
    @pytest.mark.parametrize("device", cpu_and_gpu())
    @pytest.mark.parametrize("contiguous", (True, False))
    @pytest.mark.parametrize("batch_sz", (0, 33))
699
700
    def test_forward(self, device, contiguous, batch_sz, dtype=None):
        dtype = dtype or self.dtype
701
        x, _, offset, mask, _, stride, padding, dilation = self.get_fn_args(device, contiguous, batch_sz, dtype)
702
703
704
705
        in_channels = 6
        out_channels = 2
        kernel_size = (3, 2)
        groups = 2
Nicolas Hug's avatar
Nicolas Hug committed
706
        tol = 2e-3 if dtype is torch.half else 1e-5
707

708
709
710
        layer = ops.DeformConv2d(
            in_channels, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups
        ).to(device=x.device, dtype=dtype)
711
        res = layer(x, offset, mask)
712
713
714

        weight = layer.weight.data
        bias = layer.bias.data
715
716
        expected = self.expected_fn(x, weight, offset, mask, bias, stride=stride, padding=padding, dilation=dilation)

717
        torch.testing.assert_close(
718
            res.to(expected), expected, rtol=tol, atol=tol, msg=f"\nres:\n{res}\nexpected:\n{expected}"
719
        )
720
721
722
723

        # no modulation test
        res = layer(x, offset)
        expected = self.expected_fn(x, weight, offset, None, bias, stride=stride, padding=padding, dilation=dilation)
724

725
        torch.testing.assert_close(
726
            res.to(expected), expected, rtol=tol, atol=tol, msg=f"\nres:\n{res}\nexpected:\n{expected}"
727
        )
728

729
730
731
732
733
    def test_wrong_sizes(self):
        in_channels = 6
        out_channels = 2
        kernel_size = (3, 2)
        groups = 2
734
735
736
737
738
739
        x, _, offset, mask, _, stride, padding, dilation = self.get_fn_args(
            "cpu", contiguous=True, batch_sz=10, dtype=self.dtype
        )
        layer = ops.DeformConv2d(
            in_channels, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups
        )
740
        with pytest.raises(RuntimeError, match="the shape of the offset"):
741
            wrong_offset = torch.rand_like(offset[:, :2])
742
            layer(x, wrong_offset)
743

744
        with pytest.raises(RuntimeError, match=r"mask.shape\[1\] is not valid"):
745
            wrong_mask = torch.rand_like(mask[:, :2])
746
            layer(x, offset, wrong_mask)
747

748
749
750
    @pytest.mark.parametrize("device", cpu_and_gpu())
    @pytest.mark.parametrize("contiguous", (True, False))
    @pytest.mark.parametrize("batch_sz", (0, 33))
751
    def test_backward(self, device, contiguous, batch_sz):
752
753
754
        x, weight, offset, mask, bias, stride, padding, dilation = self.get_fn_args(
            device, contiguous, batch_sz, self.dtype
        )
755
756

        def func(x_, offset_, mask_, weight_, bias_):
757
758
759
            return ops.deform_conv2d(
                x_, offset_, weight_, bias_, stride=stride, padding=padding, dilation=dilation, mask=mask_
            )
760

761
        gradcheck(func, (x, offset, mask, weight, bias), nondet_tol=1e-5, fast_mode=True)
762
763

        def func_no_mask(x_, offset_, weight_, bias_):
764
765
766
            return ops.deform_conv2d(
                x_, offset_, weight_, bias_, stride=stride, padding=padding, dilation=dilation, mask=None
            )
767

768
        gradcheck(func_no_mask, (x, offset, weight, bias), nondet_tol=1e-5, fast_mode=True)
769
770
771
772

        @torch.jit.script
        def script_func(x_, offset_, mask_, weight_, bias_, stride_, pad_, dilation_):
            # type:(Tensor, Tensor, Tensor, Tensor, Tensor, Tuple[int, int], Tuple[int, int], Tuple[int, int])->Tensor
773
774
775
776
777
778
779
780
781
782
            return ops.deform_conv2d(
                x_, offset_, weight_, bias_, stride=stride_, padding=pad_, dilation=dilation_, mask=mask_
            )

        gradcheck(
            lambda z, off, msk, wei, bi: script_func(z, off, msk, wei, bi, stride, padding, dilation),
            (x, offset, mask, weight, bias),
            nondet_tol=1e-5,
            fast_mode=True,
        )
783
784

        @torch.jit.script
785
786
        def script_func_no_mask(x_, offset_, weight_, bias_, stride_, pad_, dilation_):
            # type:(Tensor, Tensor, Tensor, Tensor, Tuple[int, int], Tuple[int, int], Tuple[int, int])->Tensor
787
788
789
790
791
792
793
794
795
796
            return ops.deform_conv2d(
                x_, offset_, weight_, bias_, stride=stride_, padding=pad_, dilation=dilation_, mask=None
            )

        gradcheck(
            lambda z, off, wei, bi: script_func_no_mask(z, off, wei, bi, stride, padding, dilation),
            (x, offset, weight, bias),
            nondet_tol=1e-5,
            fast_mode=True,
        )
797

798
    @needs_cuda
799
    @pytest.mark.parametrize("contiguous", (True, False))
800
    def test_compare_cpu_cuda_grads(self, contiguous):
801
802
803
        # Test from https://github.com/pytorch/vision/issues/2598
        # Run on CUDA only

804
805
        # compare grads computed on CUDA with grads computed on CPU
        true_cpu_grads = None
806

807
808
809
810
        init_weight = torch.randn(9, 9, 3, 3, requires_grad=True)
        img = torch.randn(8, 9, 1000, 110)
        offset = torch.rand(8, 2 * 3 * 3, 1000, 110)
        mask = torch.rand(8, 3 * 3, 1000, 110)
811

812
813
814
815
816
817
818
        if not contiguous:
            img = img.permute(0, 1, 3, 2).contiguous().permute(0, 1, 3, 2)
            offset = offset.permute(1, 3, 0, 2).contiguous().permute(2, 0, 3, 1)
            mask = mask.permute(1, 3, 0, 2).contiguous().permute(2, 0, 3, 1)
            weight = init_weight.permute(3, 2, 0, 1).contiguous().permute(2, 3, 1, 0)
        else:
            weight = init_weight
819

820
        for d in ["cpu", "cuda"]:
821

822
823
824
825
826
827
828
829
830
831
832
            out = ops.deform_conv2d(img.to(d), offset.to(d), weight.to(d), padding=1, mask=mask.to(d))
            out.mean().backward()
            if true_cpu_grads is None:
                true_cpu_grads = init_weight.grad
                assert true_cpu_grads is not None
            else:
                assert init_weight.grad is not None
                res_grads = init_weight.grad.to("cpu")
                torch.testing.assert_close(true_cpu_grads, res_grads)

    @needs_cuda
833
834
    @pytest.mark.parametrize("batch_sz", (0, 33))
    @pytest.mark.parametrize("dtype", (torch.float, torch.half))
835
836
837
838
    def test_autocast(self, batch_sz, dtype):
        with torch.cuda.amp.autocast():
            self.test_forward(torch.device("cuda"), contiguous=False, batch_sz=batch_sz, dtype=dtype)

839
840
841
842
    def test_forward_scriptability(self):
        # Non-regression test for https://github.com/pytorch/vision/issues/4078
        torch.jit.script(ops.DeformConv2d(in_channels=8, out_channels=8, kernel_size=3))

843
844

class TestFrozenBNT:
845
846
    def test_frozenbatchnorm2d_repr(self):
        num_features = 32
847
848
        eps = 1e-5
        t = ops.misc.FrozenBatchNorm2d(num_features, eps=eps)
849
850

        # Check integrity of object __repr__ attribute
851
        expected_string = f"FrozenBatchNorm2d({num_features}, eps={eps})"
852
        assert repr(t) == expected_string
853

854
855
856
    @pytest.mark.parametrize("seed", range(10))
    def test_frozenbatchnorm2d_eps(self, seed):
        torch.random.manual_seed(seed)
857
858
        sample_size = (4, 32, 28, 28)
        x = torch.rand(sample_size)
859
860
861
862
863
864
865
        state_dict = dict(
            weight=torch.rand(sample_size[1]),
            bias=torch.rand(sample_size[1]),
            running_mean=torch.rand(sample_size[1]),
            running_var=torch.rand(sample_size[1]),
            num_batches_tracked=torch.tensor(100),
        )
866

867
        # Check that default eps is equal to the one of BN
868
869
        fbn = ops.misc.FrozenBatchNorm2d(sample_size[1])
        fbn.load_state_dict(state_dict, strict=False)
870
        bn = torch.nn.BatchNorm2d(sample_size[1]).eval()
871
872
        bn.load_state_dict(state_dict)
        # Difference is expected to fall in an acceptable range
873
        torch.testing.assert_close(fbn(x), bn(x), rtol=1e-5, atol=1e-6)
874
875
876
877
878
879

        # Check computation for eps > 0
        fbn = ops.misc.FrozenBatchNorm2d(sample_size[1], eps=1e-5)
        fbn.load_state_dict(state_dict, strict=False)
        bn = torch.nn.BatchNorm2d(sample_size[1], eps=1e-5).eval()
        bn.load_state_dict(state_dict)
880
        torch.testing.assert_close(fbn(x), bn(x), rtol=1e-5, atol=1e-6)
881
882
883
884

    def test_frozenbatchnorm2d_n_arg(self):
        """Ensure a warning is thrown when passing `n` kwarg
        (remove this when support of `n` is dropped)"""
885
886
        with pytest.warns(DeprecationWarning):
            ops.misc.FrozenBatchNorm2d(32, eps=1e-5, n=32)
887

888

889
class TestBoxConversion:
890
891
892
    def _get_box_sequences():
        # Define here the argument type of `boxes` supported by region pooling operations
        box_tensor = torch.tensor([[0, 0, 0, 100, 100], [1, 0, 0, 100, 100]], dtype=torch.float)
893
894
895
896
        box_list = [
            torch.tensor([[0, 0, 100, 100]], dtype=torch.float),
            torch.tensor([[0, 0, 100, 100]], dtype=torch.float),
        ]
897
898
899
        box_tuple = tuple(box_list)
        return box_tensor, box_list, box_tuple

900
    @pytest.mark.parametrize("box_sequence", _get_box_sequences())
901
    def test_check_roi_boxes_shape(self, box_sequence):
902
        # Ensure common sequences of tensors are supported
903
        ops._utils.check_roi_boxes_shape(box_sequence)
904

905
    @pytest.mark.parametrize("box_sequence", _get_box_sequences())
906
    def test_convert_boxes_to_roi_format(self, box_sequence):
907
908
        # Ensure common sequences of tensors yield the same result
        ref_tensor = None
909
910
911
912
        if ref_tensor is None:
            ref_tensor = box_sequence
        else:
            assert_equal(ref_tensor, ops._utils.convert_boxes_to_roi_format(box_sequence))
913
914


915
class TestBox:
916
    def test_bbox_same(self):
917
918
919
        box_tensor = torch.tensor(
            [[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float
        )
920

921
        exp_xyxy = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)
922

923
924
925
926
        assert exp_xyxy.size() == torch.Size([4, 4])
        assert_equal(ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xyxy"), exp_xyxy)
        assert_equal(ops.box_convert(box_tensor, in_fmt="xywh", out_fmt="xywh"), exp_xyxy)
        assert_equal(ops.box_convert(box_tensor, in_fmt="cxcywh", out_fmt="cxcywh"), exp_xyxy)
927
928
929
930

    def test_bbox_xyxy_xywh(self):
        # Simple test convert boxes to xywh and back. Make sure they are same.
        # box_tensor is in x1 y1 x2 y2 format.
931
932
933
934
        box_tensor = torch.tensor(
            [[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float
        )
        exp_xywh = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float)
935

936
        assert exp_xywh.size() == torch.Size([4, 4])
937
        box_xywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xywh")
938
        assert_equal(box_xywh, exp_xywh)
939
940
941

        # Reverse conversion
        box_xyxy = ops.box_convert(box_xywh, in_fmt="xywh", out_fmt="xyxy")
942
        assert_equal(box_xyxy, box_tensor)
943
944
945
946

    def test_bbox_xyxy_cxcywh(self):
        # Simple test convert boxes to xywh and back. Make sure they are same.
        # box_tensor is in x1 y1 x2 y2 format.
947
948
949
950
951
952
        box_tensor = torch.tensor(
            [[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float
        )
        exp_cxcywh = torch.tensor(
            [[50, 50, 100, 100], [0, 0, 0, 0], [20, 25, 20, 20], [58, 65, 70, 60]], dtype=torch.float
        )
953

954
        assert exp_cxcywh.size() == torch.Size([4, 4])
955
        box_cxcywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="cxcywh")
956
        assert_equal(box_cxcywh, exp_cxcywh)
957
958
959

        # Reverse conversion
        box_xyxy = ops.box_convert(box_cxcywh, in_fmt="cxcywh", out_fmt="xyxy")
960
        assert_equal(box_xyxy, box_tensor)
961
962

    def test_bbox_xywh_cxcywh(self):
963
964
965
        box_tensor = torch.tensor(
            [[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float
        )
966
967

        # This is wrong
968
969
970
        exp_cxcywh = torch.tensor(
            [[50, 50, 100, 100], [0, 0, 0, 0], [20, 25, 20, 20], [58, 65, 70, 60]], dtype=torch.float
        )
971

972
        assert exp_cxcywh.size() == torch.Size([4, 4])
973
        box_cxcywh = ops.box_convert(box_tensor, in_fmt="xywh", out_fmt="cxcywh")
974
        assert_equal(box_cxcywh, exp_cxcywh)
975
976
977

        # Reverse conversion
        box_xywh = ops.box_convert(box_cxcywh, in_fmt="cxcywh", out_fmt="xywh")
978
        assert_equal(box_xywh, box_tensor)
979

980
981
    @pytest.mark.parametrize("inv_infmt", ["xwyh", "cxwyh"])
    @pytest.mark.parametrize("inv_outfmt", ["xwcx", "xhwcy"])
982
    def test_bbox_invalid(self, inv_infmt, inv_outfmt):
983
984
985
        box_tensor = torch.tensor(
            [[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float
        )
986

987
988
        with pytest.raises(ValueError):
            ops.box_convert(box_tensor, inv_infmt, inv_outfmt)
989
990

    def test_bbox_convert_jit(self):
991
992
993
        box_tensor = torch.tensor(
            [[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float
        )
994

995
996
        scripted_fn = torch.jit.script(ops.box_convert)
        TOLERANCE = 1e-3
997

998
        box_xywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xywh")
999
        scripted_xywh = scripted_fn(box_tensor, "xyxy", "xywh")
1000
        torch.testing.assert_close(scripted_xywh, box_xywh, rtol=0.0, atol=TOLERANCE)
1001

1002
        box_cxcywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="cxcywh")
1003
        scripted_cxcywh = scripted_fn(box_tensor, "xyxy", "cxcywh")
1004
        torch.testing.assert_close(scripted_cxcywh, box_cxcywh, rtol=0.0, atol=TOLERANCE)
1005
1006


1007
class TestBoxArea:
Aditya Oke's avatar
Aditya Oke committed
1008
    def test_box_area(self):
1009
1010
        def area_check(box, expected, tolerance=1e-4):
            out = ops.box_area(box)
1011
            torch.testing.assert_close(out, expected, rtol=0.0, check_dtype=False, atol=tolerance)
1012
1013
1014
1015
1016
1017
1018
1019
1020

        # Check for int boxes
        for dtype in [torch.int8, torch.int16, torch.int32, torch.int64]:
            box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0]], dtype=dtype)
            expected = torch.tensor([10000, 0])
            area_check(box_tensor, expected)

        # Check for float32 and float64 boxes
        for dtype in [torch.float32, torch.float64]:
1021
1022
1023
1024
1025
1026
1027
1028
            box_tensor = torch.tensor(
                [
                    [285.3538, 185.5758, 1193.5110, 851.4551],
                    [285.1472, 188.7374, 1192.4984, 851.0669],
                    [279.2440, 197.9812, 1189.4746, 849.2019],
                ],
                dtype=dtype,
            )
1029
1030
1031
1032
            expected = torch.tensor([604723.0806, 600965.4666, 592761.0085], dtype=torch.float64)
            area_check(box_tensor, expected, tolerance=0.05)

        # Check for float16 box
1033
1034
1035
1036
        box_tensor = torch.tensor(
            [[285.25, 185.625, 1194.0, 851.5], [285.25, 188.75, 1192.0, 851.0], [279.25, 198.0, 1189.0, 849.0]],
            dtype=torch.float16,
        )
1037
1038
        expected = torch.tensor([605113.875, 600495.1875, 592247.25])
        area_check(box_tensor, expected)
Aditya Oke's avatar
Aditya Oke committed
1039

Aditya Oke's avatar
Aditya Oke committed
1040
1041
1042
1043
1044
1045
1046
1047
    def test_box_area_jit(self):
        box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0]], dtype=torch.float)
        TOLERANCE = 1e-3
        expected = ops.box_area(box_tensor)
        scripted_fn = torch.jit.script(ops.box_area)
        scripted_area = scripted_fn(box_tensor)
        torch.testing.assert_close(scripted_area, expected, rtol=0.0, atol=TOLERANCE)

Aditya Oke's avatar
Aditya Oke committed
1048

1049
class TestBoxIou:
Aditya Oke's avatar
Aditya Oke committed
1050
    def test_iou(self):
1051
1052
        def iou_check(box, expected, tolerance=1e-4):
            out = ops.box_iou(box, box)
1053
            torch.testing.assert_close(out, expected, rtol=0.0, check_dtype=False, atol=tolerance)
1054
1055
1056
1057
1058
1059
1060
1061
1062

        # Check for int boxes
        for dtype in [torch.int16, torch.int32, torch.int64]:
            box = torch.tensor([[0, 0, 100, 100], [0, 0, 50, 50], [200, 200, 300, 300]], dtype=dtype)
            expected = torch.tensor([[1.0, 0.25, 0.0], [0.25, 1.0, 0.0], [0.0, 0.0, 1.0]])
            iou_check(box, expected)

        # Check for float boxes
        for dtype in [torch.float16, torch.float32, torch.float64]:
1063
1064
1065
1066
1067
1068
1069
1070
            box_tensor = torch.tensor(
                [
                    [285.3538, 185.5758, 1193.5110, 851.4551],
                    [285.1472, 188.7374, 1192.4984, 851.0669],
                    [279.2440, 197.9812, 1189.4746, 849.2019],
                ],
                dtype=dtype,
            )
1071
1072
            expected = torch.tensor([[1.0, 0.9933, 0.9673], [0.9933, 1.0, 0.9737], [0.9673, 0.9737, 1.0]])
            iou_check(box_tensor, expected, tolerance=0.002 if dtype == torch.float16 else 1e-4)
Aditya Oke's avatar
Aditya Oke committed
1073

Aditya Oke's avatar
Aditya Oke committed
1074
1075
1076
1077
1078
1079
1080
1081
    def test_iou_jit(self):
        box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 50, 50], [200, 200, 300, 300]], dtype=torch.float)
        TOLERANCE = 1e-3
        expected = ops.box_iou(box_tensor, box_tensor)
        scripted_fn = torch.jit.script(ops.box_iou)
        scripted_iou = scripted_fn(box_tensor, box_tensor)
        torch.testing.assert_close(scripted_iou, expected, rtol=0.0, atol=TOLERANCE)

Aditya Oke's avatar
Aditya Oke committed
1082

1083
class TestGenBoxIou:
Aditya Oke's avatar
Aditya Oke committed
1084
    def test_gen_iou(self):
1085
1086
        def gen_iou_check(box, expected, tolerance=1e-4):
            out = ops.generalized_box_iou(box, box)
1087
            torch.testing.assert_close(out, expected, rtol=0.0, check_dtype=False, atol=tolerance)
1088
1089
1090
1091
1092
1093
1094
1095
1096

        # Check for int boxes
        for dtype in [torch.int16, torch.int32, torch.int64]:
            box = torch.tensor([[0, 0, 100, 100], [0, 0, 50, 50], [200, 200, 300, 300]], dtype=dtype)
            expected = torch.tensor([[1.0, 0.25, -0.7778], [0.25, 1.0, -0.8611], [-0.7778, -0.8611, 1.0]])
            gen_iou_check(box, expected)

        # Check for float boxes
        for dtype in [torch.float16, torch.float32, torch.float64]:
1097
1098
1099
1100
1101
1102
1103
1104
            box_tensor = torch.tensor(
                [
                    [285.3538, 185.5758, 1193.5110, 851.4551],
                    [285.1472, 188.7374, 1192.4984, 851.0669],
                    [279.2440, 197.9812, 1189.4746, 849.2019],
                ],
                dtype=dtype,
            )
1105
1106
            expected = torch.tensor([[1.0, 0.9933, 0.9673], [0.9933, 1.0, 0.9737], [0.9673, 0.9737, 1.0]])
            gen_iou_check(box_tensor, expected, tolerance=0.002 if dtype == torch.float16 else 1e-3)
Aditya Oke's avatar
Aditya Oke committed
1107

Aditya Oke's avatar
Aditya Oke committed
1108
1109
1110
1111
1112
1113
1114
1115
    def test_giou_jit(self):
        box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 50, 50], [200, 200, 300, 300]], dtype=torch.float)
        TOLERANCE = 1e-3
        expected = ops.generalized_box_iou(box_tensor, box_tensor)
        scripted_fn = torch.jit.script(ops.generalized_box_iou)
        scripted_iou = scripted_fn(box_tensor, box_tensor)
        torch.testing.assert_close(scripted_iou, expected, rtol=0.0, atol=TOLERANCE)

Aditya Oke's avatar
Aditya Oke committed
1116

1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
class TestMasksToBoxes:
    def test_masks_box(self):
        def masks_box_check(masks, expected, tolerance=1e-4):
            out = ops.masks_to_boxes(masks)
            assert out.dtype == torch.float
            torch.testing.assert_close(out, expected, rtol=0.0, check_dtype=False, atol=tolerance)

        # Check for int type boxes.
        def _get_image():
            assets_directory = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets")
            mask_path = os.path.join(assets_directory, "masks.tiff")
            image = Image.open(mask_path)
            return image

        def _create_masks(image, masks):
            for index in range(image.n_frames):
                image.seek(index)
                frame = np.array(image)
                masks[index] = torch.tensor(frame)

            return masks

1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
        expected = torch.tensor(
            [
                [127, 2, 165, 40],
                [2, 50, 44, 92],
                [56, 63, 98, 100],
                [139, 68, 175, 104],
                [160, 112, 198, 145],
                [49, 138, 99, 182],
                [108, 148, 152, 213],
            ],
            dtype=torch.float,
        )
1151
1152
1153
1154
1155
1156
1157
1158

        image = _get_image()
        for dtype in [torch.float16, torch.float32, torch.float64]:
            masks = torch.zeros((image.n_frames, image.height, image.width), dtype=dtype)
            masks = _create_masks(image, masks)
            masks_box_check(masks, expected)


1159
class TestStochasticDepth:
1160
    @pytest.mark.parametrize("seed", range(10))
1161
1162
    @pytest.mark.parametrize("p", [0.2, 0.5, 0.8])
    @pytest.mark.parametrize("mode", ["batch", "row"])
1163
1164
    def test_stochastic_depth_random(self, seed, mode, p):
        torch.manual_seed(seed)
1165
1166
1167
        stats = pytest.importorskip("scipy.stats")
        batch_size = 5
        x = torch.ones(size=(batch_size, 3, 4, 4))
1168
        layer = ops.StochasticDepth(p=p, mode=mode)
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
        layer.__repr__()

        trials = 250
        num_samples = 0
        counts = 0
        for _ in range(trials):
            out = layer(x)
            non_zero_count = out.sum(dim=(1, 2, 3)).nonzero().size(0)
            if mode == "batch":
                if non_zero_count == 0:
                    counts += 1
                num_samples += 1
            elif mode == "row":
                counts += batch_size - non_zero_count
                num_samples += batch_size

        p_value = stats.binom_test(counts, num_samples, p=p)
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
        assert p_value > 0.01

    @pytest.mark.parametrize("seed", range(10))
    @pytest.mark.parametrize("p", (0, 1))
    @pytest.mark.parametrize("mode", ["batch", "row"])
    def test_stochastic_depth(self, seed, mode, p):
        torch.manual_seed(seed)
        batch_size = 5
        x = torch.ones(size=(batch_size, 3, 4, 4))
        layer = ops.StochasticDepth(p=p, mode=mode)

        out = layer(x)
        if p == 0:
            assert out.equal(x)
        elif p == 1:
            assert out.equal(torch.zeros_like(x))
1202
1203


1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
class TestUtils:
    @pytest.mark.parametrize("norm_layer", [None, nn.BatchNorm2d, nn.LayerNorm])
    def test_split_normalization_params(self, norm_layer):
        model = models.mobilenet_v3_large(norm_layer=norm_layer)
        params = ops._utils.split_normalization_params(model, None if norm_layer is None else [norm_layer])

        assert len(params[0]) == 92
        assert len(params[1]) == 82


1214
if __name__ == "__main__":
1215
    pytest.main([__file__])