test_ops.py 48 KB
Newer Older
1
import math
2
import os
3
from abc import ABC, abstractmethod
4
5
from functools import lru_cache
from typing import Tuple
6

7
import numpy as np
8
import pytest
9
import torch
10
11
from common_utils import needs_cuda, cpu_and_gpu, assert_equal
from PIL import Image
12
from torch import Tensor
13
from torch.autograd import gradcheck
14
from torch.nn.modules.utils import _pair
15
16
17
from torchvision import ops


18
19
class RoIOpTester(ABC):
    dtype = torch.float64
20

21
22
    @pytest.mark.parametrize("device", cpu_and_gpu())
    @pytest.mark.parametrize("contiguous", (True, False))
23
    def test_forward(self, device, contiguous, x_dtype=None, rois_dtype=None, **kwargs):
24
25
        x_dtype = self.dtype if x_dtype is None else x_dtype
        rois_dtype = self.dtype if rois_dtype is None else rois_dtype
26
27
28
        pool_size = 5
        # n_channels % (pool_size ** 2) == 0 required for PS opeartions.
        n_channels = 2 * (pool_size ** 2)
29
        x = torch.rand(2, n_channels, 10, 10, dtype=x_dtype, device=device)
30
31
        if not contiguous:
            x = x.permute(0, 1, 3, 2)
32
33
34
35
36
        rois = torch.tensor(
            [[0, 0, 0, 9, 9], [0, 0, 5, 4, 9], [0, 5, 5, 9, 9], [1, 0, 0, 9, 9]],  # format is (xyxy)
            dtype=rois_dtype,
            device=device,
        )
37

38
        pool_h, pool_w = pool_size, pool_size
39
        y = self.fn(x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs)
40
        # the following should be true whether we're running an autocast test or not.
41
        assert y.dtype == x.dtype
42
43
44
        gt_y = self.expected_fn(
            x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, device=device, dtype=self.dtype, **kwargs
        )
45

46
        tol = 1e-3 if (x_dtype is torch.half or rois_dtype is torch.half) else 1e-5
47
        torch.testing.assert_close(gt_y.to(y), y, rtol=tol, atol=tol)
48

49
50
    @pytest.mark.parametrize("device", cpu_and_gpu())
    @pytest.mark.parametrize("contiguous", (True, False))
51
    def test_backward(self, device, contiguous):
52
53
54
55
        pool_size = 2
        x = torch.rand(1, 2 * (pool_size ** 2), 5, 5, dtype=self.dtype, device=device, requires_grad=True)
        if not contiguous:
            x = x.permute(0, 1, 3, 2)
56
57
58
        rois = torch.tensor(
            [[0, 0, 0, 4, 4], [0, 0, 2, 3, 4], [0, 2, 2, 4, 4]], dtype=self.dtype, device=device  # format is (xyxy)
        )
59

60
61
        def func(z):
            return self.fn(z, rois, pool_size, pool_size, spatial_scale=1, sampling_ratio=1)
62

63
        script_func = self.get_script_fn(rois, pool_size)
64

65
66
        gradcheck(func, (x,))
        gradcheck(script_func, (x,))
67

68
    @needs_cuda
69
70
    @pytest.mark.parametrize("x_dtype", (torch.float, torch.half))
    @pytest.mark.parametrize("rois_dtype", (torch.float, torch.half))
71
72
73
    def test_autocast(self, x_dtype, rois_dtype):
        with torch.cuda.amp.autocast():
            self.test_forward(torch.device("cuda"), contiguous=False, x_dtype=x_dtype, rois_dtype=rois_dtype)
74
75
76

    def _helper_boxes_shape(self, func):
        # test boxes as Tensor[N, 5]
77
        with pytest.raises(AssertionError):
78
79
80
81
82
            a = torch.linspace(1, 8 * 8, 8 * 8).reshape(1, 1, 8, 8)
            boxes = torch.tensor([[0, 0, 3, 3]], dtype=a.dtype)
            func(a, boxes, output_size=(2, 2))

        # test boxes as List[Tensor[N, 4]]
83
        with pytest.raises(AssertionError):
84
85
86
87
            a = torch.linspace(1, 8 * 8, 8 * 8).reshape(1, 1, 8, 8)
            boxes = torch.tensor([[0, 0, 3]], dtype=a.dtype)
            ops.roi_pool(a, [boxes], output_size=(2, 2))

88
    @abstractmethod
89
90
    def fn(*args, **kwargs):
        pass
91

92
    @abstractmethod
93
94
    def get_script_fn(*args, **kwargs):
        pass
95

96
    @abstractmethod
97
98
    def expected_fn(*args, **kwargs):
        pass
99

100

101
class TestRoiPool(RoIOpTester):
102
103
    def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs):
        return ops.RoIPool((pool_h, pool_w), spatial_scale)(x, rois)
104

105
    def get_script_fn(self, rois, pool_size):
Nicolas Hug's avatar
Nicolas Hug committed
106
107
        scriped = torch.jit.script(ops.roi_pool)
        return lambda x: scriped(x, rois, pool_size)
108

109
110
111
    def expected_fn(
        self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, device=None, dtype=torch.float64
    ):
112
113
        if device is None:
            device = torch.device("cpu")
114

115
116
        n_channels = x.size(1)
        y = torch.zeros(rois.size(0), n_channels, pool_h, pool_w, dtype=dtype, device=device)
117

118
119
        def get_slice(k, block):
            return slice(int(np.floor(k * block)), int(np.ceil((k + 1) * block)))
120

121
122
123
        for roi_idx, roi in enumerate(rois):
            batch_idx = int(roi[0])
            j_begin, i_begin, j_end, i_end = (int(round(x.item() * spatial_scale)) for x in roi[1:])
124
            roi_x = x[batch_idx, :, i_begin : i_end + 1, j_begin : j_end + 1]
125

126
127
128
            roi_h, roi_w = roi_x.shape[-2:]
            bin_h = roi_h / pool_h
            bin_w = roi_w / pool_w
129

130
131
132
133
134
135
            for i in range(0, pool_h):
                for j in range(0, pool_w):
                    bin_x = roi_x[:, get_slice(i, bin_h), get_slice(j, bin_w)]
                    if bin_x.numel() > 0:
                        y[roi_idx, :, i, j] = bin_x.reshape(n_channels, -1).max(dim=1)[0]
        return y
136

137
    def test_boxes_shape(self):
138
139
        self._helper_boxes_shape(ops.roi_pool)

140

141
class TestPSRoIPool(RoIOpTester):
142
143
    def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs):
        return ops.PSRoIPool((pool_h, pool_w), 1)(x, rois)
144

145
    def get_script_fn(self, rois, pool_size):
Nicolas Hug's avatar
Nicolas Hug committed
146
147
        scriped = torch.jit.script(ops.ps_roi_pool)
        return lambda x: scriped(x, rois, pool_size)
148

149
150
151
    def expected_fn(
        self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, device=None, dtype=torch.float64
    ):
152
153
154
        if device is None:
            device = torch.device("cpu")
        n_input_channels = x.size(1)
155
        assert n_input_channels % (pool_h * pool_w) == 0, "input channels must be divisible by ph * pw"
156
157
158
159
160
161
162
163
164
        n_output_channels = int(n_input_channels / (pool_h * pool_w))
        y = torch.zeros(rois.size(0), n_output_channels, pool_h, pool_w, dtype=dtype, device=device)

        def get_slice(k, block):
            return slice(int(np.floor(k * block)), int(np.ceil((k + 1) * block)))

        for roi_idx, roi in enumerate(rois):
            batch_idx = int(roi[0])
            j_begin, i_begin, j_end, i_end = (int(round(x.item() * spatial_scale)) for x in roi[1:])
165
            roi_x = x[batch_idx, :, i_begin : i_end + 1, j_begin : j_end + 1]
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180

            roi_height = max(i_end - i_begin, 1)
            roi_width = max(j_end - j_begin, 1)
            bin_h, bin_w = roi_height / float(pool_h), roi_width / float(pool_w)

            for i in range(0, pool_h):
                for j in range(0, pool_w):
                    bin_x = roi_x[:, get_slice(i, bin_h), get_slice(j, bin_w)]
                    if bin_x.numel() > 0:
                        area = bin_x.size(-2) * bin_x.size(-1)
                        for c_out in range(0, n_output_channels):
                            c_in = c_out * (pool_h * pool_w) + pool_w * i + j
                            t = torch.sum(bin_x[c_in, :, :])
                            y[roi_idx, c_out, i, j] = t / area
        return y
181

182
    def test_boxes_shape(self):
183
184
        self._helper_boxes_shape(ops.ps_roi_pool)

185

186
187
def bilinear_interpolate(data, y, x, snap_border=False):
    height, width = data.shape
188

189
190
191
192
193
    if snap_border:
        if -1 < y <= 0:
            y = 0
        elif height - 1 <= y < height:
            y = height - 1
194

195
196
197
198
        if -1 < x <= 0:
            x = 0
        elif width - 1 <= x < width:
            x = width - 1
199

200
201
202
203
    y_low = int(math.floor(y))
    x_low = int(math.floor(x))
    y_high = y_low + 1
    x_high = x_low + 1
204

205
206
    wy_h = y - y_low
    wx_h = x - x_low
207
    wy_l = 1 - wy_h
208
    wx_l = 1 - wx_h
209

210
    val = 0
211
212
213
214
    for wx, xp in zip((wx_l, wx_h), (x_low, x_high)):
        for wy, yp in zip((wy_l, wy_h), (y_low, y_high)):
            if 0 <= yp < height and 0 <= xp < width:
                val += wx * wy * data[yp, xp]
215
    return val
216
217


218
class TestRoIAlign(RoIOpTester):
AhnDW's avatar
AhnDW committed
219
    def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, aligned=False, **kwargs):
220
221
222
        return ops.RoIAlign(
            (pool_h, pool_w), spatial_scale=spatial_scale, sampling_ratio=sampling_ratio, aligned=aligned
        )(x, rois)
223

224
    def get_script_fn(self, rois, pool_size):
Nicolas Hug's avatar
Nicolas Hug committed
225
226
        scriped = torch.jit.script(ops.roi_align)
        return lambda x: scriped(x, rois, pool_size)
227

228
229
230
231
232
233
234
235
236
237
238
239
    def expected_fn(
        self,
        in_data,
        rois,
        pool_h,
        pool_w,
        spatial_scale=1,
        sampling_ratio=-1,
        aligned=False,
        device=None,
        dtype=torch.float64,
    ):
240
241
        if device is None:
            device = torch.device("cpu")
242
243
244
        n_channels = in_data.size(1)
        out_data = torch.zeros(rois.size(0), n_channels, pool_h, pool_w, dtype=dtype, device=device)

245
        offset = 0.5 if aligned else 0.0
AhnDW's avatar
AhnDW committed
246

247
248
        for r, roi in enumerate(rois):
            batch_idx = int(roi[0])
AhnDW's avatar
AhnDW committed
249
            j_begin, i_begin, j_end, i_end = (x.item() * spatial_scale - offset for x in roi[1:])
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269

            roi_h = i_end - i_begin
            roi_w = j_end - j_begin
            bin_h = roi_h / pool_h
            bin_w = roi_w / pool_w

            for i in range(0, pool_h):
                start_h = i_begin + i * bin_h
                grid_h = sampling_ratio if sampling_ratio > 0 else int(np.ceil(bin_h))
                for j in range(0, pool_w):
                    start_w = j_begin + j * bin_w
                    grid_w = sampling_ratio if sampling_ratio > 0 else int(np.ceil(bin_w))

                    for channel in range(0, n_channels):

                        val = 0
                        for iy in range(0, grid_h):
                            y = start_h + (iy + 0.5) * bin_h / grid_h
                            for ix in range(0, grid_w):
                                x = start_w + (ix + 0.5) * bin_w / grid_w
270
                                val += bilinear_interpolate(in_data[batch_idx, channel, :, :], y, x, snap_border=True)
271
272
273
                        val /= grid_h * grid_w

                        out_data[r, channel, i, j] = val
274
275
        return out_data

276
    def test_boxes_shape(self):
277
278
        self._helper_boxes_shape(ops.roi_align)

279
280
281
    @pytest.mark.parametrize("aligned", (True, False))
    @pytest.mark.parametrize("device", cpu_and_gpu())
    @pytest.mark.parametrize("contiguous", (True, False))
282
    def test_forward(self, device, contiguous, aligned, x_dtype=None, rois_dtype=None):
283
284
285
        super().test_forward(
            device=device, contiguous=contiguous, x_dtype=x_dtype, rois_dtype=rois_dtype, aligned=aligned
        )
286

287
    @needs_cuda
288
289
290
    @pytest.mark.parametrize("aligned", (True, False))
    @pytest.mark.parametrize("x_dtype", (torch.float, torch.half))
    @pytest.mark.parametrize("rois_dtype", (torch.float, torch.half))
291
292
    def test_autocast(self, aligned, x_dtype, rois_dtype):
        with torch.cuda.amp.autocast():
293
294
295
            self.test_forward(
                torch.device("cuda"), contiguous=False, aligned=aligned, x_dtype=x_dtype, rois_dtype=rois_dtype
            )
296
297
298
299
300
301
302

    def _make_rois(self, img_size, num_imgs, dtype, num_rois=1000):
        rois = torch.randint(0, img_size // 2, size=(num_rois, 5)).to(dtype)
        rois[:, 0] = torch.randint(0, num_imgs, size=(num_rois,))  # set batch index
        rois[:, 3:] += rois[:, 1:3]  # make sure boxes aren't degenerate
        return rois

303
304
305
    @pytest.mark.parametrize("aligned", (True, False))
    @pytest.mark.parametrize("scale, zero_point", ((1, 0), (2, 10), (0.1, 50)))
    @pytest.mark.parametrize("qdtype", (torch.qint8, torch.quint8, torch.qint32))
306
    def test_qroialign(self, aligned, scale, zero_point, qdtype):
307
308
309
310
311
312
313
        """Make sure quantized version of RoIAlign is close to float version"""
        pool_size = 5
        img_size = 10
        n_channels = 2
        num_imgs = 1
        dtype = torch.float

314
315
316
317
318
319
320
        x = torch.randint(50, 100, size=(num_imgs, n_channels, img_size, img_size)).to(dtype)
        qx = torch.quantize_per_tensor(x, scale=scale, zero_point=zero_point, dtype=qdtype)

        rois = self._make_rois(img_size, num_imgs, dtype)
        qrois = torch.quantize_per_tensor(rois, scale=scale, zero_point=zero_point, dtype=qdtype)

        x, rois = qx.dequantize(), qrois.dequantize()  # we want to pass the same inputs
321

322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
        y = ops.roi_align(
            x,
            rois,
            output_size=pool_size,
            spatial_scale=1,
            sampling_ratio=-1,
            aligned=aligned,
        )
        qy = ops.roi_align(
            qx,
            qrois,
            output_size=pool_size,
            spatial_scale=1,
            sampling_ratio=-1,
            aligned=aligned,
        )

        # The output qy is itself a quantized tensor and there might have been a loss of info when it was
        # quantized. For a fair comparison we need to quantize y as well
        quantized_float_y = torch.quantize_per_tensor(y, scale=scale, zero_point=zero_point, dtype=qdtype)

        try:
            # Ideally, we would assert this, which passes with (scale, zero) == (1, 0)
            assert (qy == quantized_float_y).all()
        except AssertionError:
            # But because the computation aren't exactly the same between the 2 RoIAlign procedures, some
            # rounding error may lead to a difference of 2 in the output.
            # For example with (scale, zero) = (2, 10), 45.00000... will be quantized to 44
            # but 45.00000001 will be rounded to 46. We make sure below that:
            # - such discrepancies between qy and quantized_float_y are very rare (less then 5%)
            # - any difference between qy and quantized_float_y is == scale
            diff_idx = torch.where(qy != quantized_float_y)
            num_diff = diff_idx[0].numel()
355
            assert num_diff / qy.numel() < 0.05
356
357
358
359
360
361
362

            abs_diff = torch.abs(qy[diff_idx].dequantize() - quantized_float_y[diff_idx].dequantize())
            t_scale = torch.full_like(abs_diff, fill_value=scale)
            torch.testing.assert_close(abs_diff, t_scale, rtol=1e-5, atol=1e-5)

    def test_qroi_align_multiple_images(self):
        dtype = torch.float
363
364
        x = torch.randint(50, 100, size=(2, 3, 10, 10)).to(dtype)
        qx = torch.quantize_per_tensor(x, scale=1, zero_point=0, dtype=torch.qint8)
365
        rois = self._make_rois(img_size=10, num_imgs=2, dtype=dtype, num_rois=10)
366
        qrois = torch.quantize_per_tensor(rois, scale=1, zero_point=0, dtype=torch.qint8)
367
368
        with pytest.raises(RuntimeError, match="Only one image per batch is allowed"):
            ops.roi_align(qx, qrois, output_size=5)
369

370

371
class TestPSRoIAlign(RoIOpTester):
372
    def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs):
373
        return ops.PSRoIAlign((pool_h, pool_w), spatial_scale=spatial_scale, sampling_ratio=sampling_ratio)(x, rois)
374

375
    def get_script_fn(self, rois, pool_size):
Nicolas Hug's avatar
Nicolas Hug committed
376
377
        scriped = torch.jit.script(ops.ps_roi_align)
        return lambda x: scriped(x, rois, pool_size)
378

379
380
381
    def expected_fn(
        self, in_data, rois, pool_h, pool_w, device, spatial_scale=1, sampling_ratio=-1, dtype=torch.float64
    ):
382
383
        if device is None:
            device = torch.device("cpu")
384
        n_input_channels = in_data.size(1)
385
        assert n_input_channels % (pool_h * pool_w) == 0, "input channels must be divisible by ph * pw"
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
        n_output_channels = int(n_input_channels / (pool_h * pool_w))
        out_data = torch.zeros(rois.size(0), n_output_channels, pool_h, pool_w, dtype=dtype, device=device)

        for r, roi in enumerate(rois):
            batch_idx = int(roi[0])
            j_begin, i_begin, j_end, i_end = (x.item() * spatial_scale - 0.5 for x in roi[1:])

            roi_h = i_end - i_begin
            roi_w = j_end - j_begin
            bin_h = roi_h / pool_h
            bin_w = roi_w / pool_w

            for i in range(0, pool_h):
                start_h = i_begin + i * bin_h
                grid_h = sampling_ratio if sampling_ratio > 0 else int(np.ceil(bin_h))
                for j in range(0, pool_w):
                    start_w = j_begin + j * bin_w
                    grid_w = sampling_ratio if sampling_ratio > 0 else int(np.ceil(bin_w))
                    for c_out in range(0, n_output_channels):
                        c_in = c_out * (pool_h * pool_w) + pool_w * i + j

                        val = 0
                        for iy in range(0, grid_h):
                            y = start_h + (iy + 0.5) * bin_h / grid_h
                            for ix in range(0, grid_w):
                                x = start_w + (ix + 0.5) * bin_w / grid_w
412
                                val += bilinear_interpolate(in_data[batch_idx, c_in, :, :], y, x, snap_border=True)
413
414
415
416
                        val /= grid_h * grid_w

                        out_data[r, c_out, i, j] = val
        return out_data
417

418
    def test_boxes_shape(self):
419
420
        self._helper_boxes_shape(ops.ps_roi_align)

421

422
class TestMultiScaleRoIAlign:
423
    def test_msroialign_repr(self):
424
        fmap_names = ["0"]
425
426
427
428
429
430
        output_size = (7, 7)
        sampling_ratio = 2
        # Pass mock feature map names
        t = ops.poolers.MultiScaleRoIAlign(fmap_names, output_size, sampling_ratio)

        # Check integrity of object __repr__ attribute
431
432
433
434
        expected_string = (
            f"MultiScaleRoIAlign(featmap_names={fmap_names}, output_size={output_size}, "
            f"sampling_ratio={sampling_ratio})"
        )
435
        assert repr(t) == expected_string
436
437


438
439
class TestNMS:
    def _reference_nms(self, boxes, scores, iou_threshold):
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
        """
        Args:
            box_scores (N, 5): boxes in corner-form and probabilities.
            iou_threshold: intersection over union threshold.
        Returns:
             picked: a list of indexes of the kept boxes
        """
        picked = []
        _, indexes = scores.sort(descending=True)
        while len(indexes) > 0:
            current = indexes[0]
            picked.append(current.item())
            if len(indexes) == 1:
                break
            current_box = boxes[current, :]
            indexes = indexes[1:]
            rest_boxes = boxes[indexes, :]
            iou = ops.box_iou(rest_boxes, current_box.unsqueeze(0)).squeeze(1)
            indexes = indexes[iou <= iou_threshold]

        return torch.as_tensor(picked)

462
463
464
465
466
    def _create_tensors_with_iou(self, N, iou_thresh):
        # force last box to have a pre-defined iou with the first box
        # let b0 be [x0, y0, x1, y1], and b1 be [x0, y0, x1 + d, y1],
        # then, in order to satisfy ops.iou(b0, b1) == iou_thresh,
        # we need to have d = (x1 - x0) * (1 - iou_thresh) / iou_thresh
467
468
469
        # Adjust the threshold upward a bit with the intent of creating
        # at least one box that exceeds (barely) the threshold and so
        # should be suppressed.
470
        boxes = torch.rand(N, 4) * 100
471
472
473
        boxes[:, 2:] += boxes[:, :2]
        boxes[-1, :] = boxes[0, :]
        x0, y0, x1, y1 = boxes[-1].tolist()
474
        iou_thresh += 1e-5
475
        boxes[-1, 2] += (x1 - x0) * (1 - iou_thresh) / iou_thresh
476
477
478
        scores = torch.rand(N)
        return boxes, scores

479
    @pytest.mark.parametrize("iou", (0.2, 0.5, 0.8))
480
    def test_nms_ref(self, iou):
481
        err_msg = "NMS incompatible between CPU and reference implementation for IoU={}"
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
        boxes, scores = self._create_tensors_with_iou(1000, iou)
        keep_ref = self._reference_nms(boxes, scores, iou)
        keep = ops.nms(boxes, scores, iou)
        assert torch.allclose(keep, keep_ref), err_msg.format(iou)

    def test_nms_input_errors(self):
        with pytest.raises(RuntimeError):
            ops.nms(torch.rand(4), torch.rand(3), 0.5)
        with pytest.raises(RuntimeError):
            ops.nms(torch.rand(3, 5), torch.rand(3), 0.5)
        with pytest.raises(RuntimeError):
            ops.nms(torch.rand(3, 4), torch.rand(3, 2), 0.5)
        with pytest.raises(RuntimeError):
            ops.nms(torch.rand(3, 4), torch.rand(4), 0.5)

497
    @pytest.mark.parametrize("iou", (0.2, 0.5, 0.8))
498
499
    @pytest.mark.parametrize("scale, zero_point", ((1, 0), (2, 50), (3, 10)))
    def test_qnms(self, iou, scale, zero_point):
500
501
502
        # Note: we compare qnms vs nms instead of qnms vs reference implementation.
        # This is because with the int convertion, the trick used in _create_tensors_with_iou
        # doesn't really work (in fact, nms vs reference implem will also fail with ints)
503
        err_msg = "NMS and QNMS give different results for IoU={}"
504
505
        boxes, scores = self._create_tensors_with_iou(1000, iou)
        scores *= 100  # otherwise most scores would be 0 or 1 after int convertion
506

507
508
        qboxes = torch.quantize_per_tensor(boxes, scale=scale, zero_point=zero_point, dtype=torch.quint8)
        qscores = torch.quantize_per_tensor(scores, scale=scale, zero_point=zero_point, dtype=torch.quint8)
509

510
511
        boxes = qboxes.dequantize()
        scores = qscores.dequantize()
512

513
514
        keep = ops.nms(boxes, scores, iou)
        qkeep = ops.nms(qboxes, qscores, iou)
515

516
        assert torch.allclose(qkeep, keep), err_msg.format(iou)
517

518
    @needs_cuda
519
    @pytest.mark.parametrize("iou", (0.2, 0.5, 0.8))
520
    def test_nms_cuda(self, iou, dtype=torch.float64):
521
        tol = 1e-3 if dtype is torch.half else 1e-5
522
        err_msg = "NMS incompatible between CPU and CUDA for IoU={}"
523

524
525
526
527
528
529
530
531
532
533
534
535
        boxes, scores = self._create_tensors_with_iou(1000, iou)
        r_cpu = ops.nms(boxes, scores, iou)
        r_cuda = ops.nms(boxes.cuda(), scores.cuda(), iou)

        is_eq = torch.allclose(r_cpu, r_cuda.cpu())
        if not is_eq:
            # if the indices are not the same, ensure that it's because the scores
            # are duplicate
            is_eq = torch.allclose(scores[r_cpu], scores[r_cuda.cpu()], rtol=tol, atol=tol)
        assert is_eq, err_msg.format(iou)

    @needs_cuda
536
    @pytest.mark.parametrize("iou", (0.2, 0.5, 0.8))
537
538
539
540
541
542
    @pytest.mark.parametrize("dtype", (torch.float, torch.half))
    def test_autocast(self, iou, dtype):
        with torch.cuda.amp.autocast():
            self.test_nms_cuda(iou=iou, dtype=dtype)

    @needs_cuda
543
    def test_nms_cuda_float16(self):
544
545
546
547
548
549
550
        boxes = torch.tensor(
            [
                [285.3538, 185.5758, 1193.5110, 851.4551],
                [285.1472, 188.7374, 1192.4984, 851.0669],
                [279.2440, 197.9812, 1189.4746, 849.2019],
            ]
        ).cuda()
551
552
553
554
555
        scores = torch.tensor([0.6370, 0.7569, 0.3966]).cuda()

        iou_thres = 0.2
        keep32 = ops.nms(boxes, scores, iou_thres)
        keep16 = ops.nms(boxes.to(torch.float16), scores.to(torch.float16), iou_thres)
556
        assert_equal(keep32, keep16)
557

558
559
560
561
    def test_batched_nms_implementations(self):
        """Make sure that both implementations of batched_nms yield identical results"""

        num_boxes = 1000
562
        iou_threshold = 0.9
563
564
565
566
567
568
569
570
571
572

        boxes = torch.cat((torch.rand(num_boxes, 2), torch.rand(num_boxes, 2) + 10), dim=1)
        assert max(boxes[:, 0]) < min(boxes[:, 2])  # x1 < x2
        assert max(boxes[:, 1]) < min(boxes[:, 3])  # y1 < y2

        scores = torch.rand(num_boxes)
        idxs = torch.randint(0, 4, size=(num_boxes,))
        keep_vanilla = ops.boxes._batched_nms_vanilla(boxes, scores, idxs, iou_threshold)
        keep_trick = ops.boxes._batched_nms_coordinate_trick(boxes, scores, idxs, iou_threshold)

573
574
575
        torch.testing.assert_close(
            keep_vanilla, keep_trick, msg="The vanilla and the trick implementation yield different nms outputs."
        )
576
577
578

        # Also make sure an empty tensor is returned if boxes is empty
        empty = torch.empty((0,), dtype=torch.int64)
579
        torch.testing.assert_close(empty, ops.batched_nms(empty, None, None, None))
580

581

582
583
584
class TestDeformConv:
    dtype = torch.float64

585
    def expected_fn(self, x, weight, offset, mask, bias, stride=1, padding=0, dilation=1):
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
        stride_h, stride_w = _pair(stride)
        pad_h, pad_w = _pair(padding)
        dil_h, dil_w = _pair(dilation)
        weight_h, weight_w = weight.shape[-2:]

        n_batches, n_in_channels, in_h, in_w = x.shape
        n_out_channels = weight.shape[0]

        out_h = (in_h + 2 * pad_h - (dil_h * (weight_h - 1) + 1)) // stride_h + 1
        out_w = (in_w + 2 * pad_w - (dil_w * (weight_w - 1) + 1)) // stride_w + 1

        n_offset_grps = offset.shape[1] // (2 * weight_h * weight_w)
        in_c_per_offset_grp = n_in_channels // n_offset_grps

        n_weight_grps = n_in_channels // weight.shape[1]
        in_c_per_weight_grp = weight.shape[1]
        out_c_per_weight_grp = n_out_channels // n_weight_grps

        out = torch.zeros(n_batches, n_out_channels, out_h, out_w, device=x.device, dtype=x.dtype)
        for b in range(n_batches):
            for c_out in range(n_out_channels):
                for i in range(out_h):
                    for j in range(out_w):
                        for di in range(weight_h):
                            for dj in range(weight_w):
                                for c in range(in_c_per_weight_grp):
                                    weight_grp = c_out // out_c_per_weight_grp
                                    c_in = weight_grp * in_c_per_weight_grp + c

                                    offset_grp = c_in // in_c_per_offset_grp
616
617
                                    mask_idx = offset_grp * (weight_h * weight_w) + di * weight_w + dj
                                    offset_idx = 2 * mask_idx
618
619
620
621

                                    pi = stride_h * i - pad_h + dil_h * di + offset[b, offset_idx, i, j]
                                    pj = stride_w * j - pad_w + dil_w * dj + offset[b, offset_idx + 1, i, j]

622
623
624
625
                                    mask_value = 1.0
                                    if mask is not None:
                                        mask_value = mask[b, mask_idx, i, j]

626
627
628
629
630
                                    out[b, c_out, i, j] += (
                                        mask_value
                                        * weight[c_out, c, di, dj]
                                        * bilinear_interpolate(x[b, c_in, :, :], pi, pj)
                                    )
631
632
633
        out += bias.view(1, n_out_channels, 1, 1)
        return out

634
    @lru_cache(maxsize=None)
635
    def get_fn_args(self, device, contiguous, batch_sz, dtype):
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
        n_in_channels = 6
        n_out_channels = 2
        n_weight_grps = 2
        n_offset_grps = 3

        stride = (2, 1)
        pad = (1, 0)
        dilation = (2, 1)

        stride_h, stride_w = stride
        pad_h, pad_w = pad
        dil_h, dil_w = dilation
        weight_h, weight_w = (3, 2)
        in_h, in_w = (5, 4)

        out_h = (in_h + 2 * pad_h - (dil_h * (weight_h - 1) + 1)) // stride_h + 1
        out_w = (in_w + 2 * pad_w - (dil_w * (weight_w - 1) + 1)) // stride_w + 1

654
        x = torch.rand(batch_sz, n_in_channels, in_h, in_w, device=device, dtype=dtype, requires_grad=True)
655

656
657
658
659
660
661
662
663
664
        offset = torch.randn(
            batch_sz,
            n_offset_grps * 2 * weight_h * weight_w,
            out_h,
            out_w,
            device=device,
            dtype=dtype,
            requires_grad=True,
        )
665

666
667
668
        mask = torch.randn(
            batch_sz, n_offset_grps * weight_h * weight_w, out_h, out_w, device=device, dtype=dtype, requires_grad=True
        )
669

670
671
672
673
674
675
676
677
678
        weight = torch.randn(
            n_out_channels,
            n_in_channels // n_weight_grps,
            weight_h,
            weight_w,
            device=device,
            dtype=dtype,
            requires_grad=True,
        )
679

680
        bias = torch.randn(n_out_channels, device=device, dtype=dtype, requires_grad=True)
681
682
683
684

        if not contiguous:
            x = x.permute(0, 1, 3, 2).contiguous().permute(0, 1, 3, 2)
            offset = offset.permute(1, 3, 0, 2).contiguous().permute(2, 0, 3, 1)
685
            mask = mask.permute(1, 3, 0, 2).contiguous().permute(2, 0, 3, 1)
686
687
            weight = weight.permute(3, 2, 0, 1).contiguous().permute(2, 3, 1, 0)

688
        return x, weight, offset, mask, bias, stride, pad, dilation
689

690
691
692
    @pytest.mark.parametrize("device", cpu_and_gpu())
    @pytest.mark.parametrize("contiguous", (True, False))
    @pytest.mark.parametrize("batch_sz", (0, 33))
693
694
    def test_forward(self, device, contiguous, batch_sz, dtype=None):
        dtype = dtype or self.dtype
695
        x, _, offset, mask, _, stride, padding, dilation = self.get_fn_args(device, contiguous, batch_sz, dtype)
696
697
698
699
        in_channels = 6
        out_channels = 2
        kernel_size = (3, 2)
        groups = 2
Nicolas Hug's avatar
Nicolas Hug committed
700
        tol = 2e-3 if dtype is torch.half else 1e-5
701

702
703
704
        layer = ops.DeformConv2d(
            in_channels, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups
        ).to(device=x.device, dtype=dtype)
705
        res = layer(x, offset, mask)
706
707
708

        weight = layer.weight.data
        bias = layer.bias.data
709
710
        expected = self.expected_fn(x, weight, offset, mask, bias, stride=stride, padding=padding, dilation=dilation)

711
        torch.testing.assert_close(
712
            res.to(expected), expected, rtol=tol, atol=tol, msg="\nres:\n{}\nexpected:\n{}".format(res, expected)
713
        )
714
715
716
717

        # no modulation test
        res = layer(x, offset)
        expected = self.expected_fn(x, weight, offset, None, bias, stride=stride, padding=padding, dilation=dilation)
718

719
        torch.testing.assert_close(
720
            res.to(expected), expected, rtol=tol, atol=tol, msg="\nres:\n{}\nexpected:\n{}".format(res, expected)
721
        )
722

723
724
725
726
727
    def test_wrong_sizes(self):
        in_channels = 6
        out_channels = 2
        kernel_size = (3, 2)
        groups = 2
728
729
730
731
732
733
        x, _, offset, mask, _, stride, padding, dilation = self.get_fn_args(
            "cpu", contiguous=True, batch_sz=10, dtype=self.dtype
        )
        layer = ops.DeformConv2d(
            in_channels, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups
        )
734
        with pytest.raises(RuntimeError, match="the shape of the offset"):
735
            wrong_offset = torch.rand_like(offset[:, :2])
736
            layer(x, wrong_offset)
737

738
        with pytest.raises(RuntimeError, match=r"mask.shape\[1\] is not valid"):
739
            wrong_mask = torch.rand_like(mask[:, :2])
740
            layer(x, offset, wrong_mask)
741

742
743
744
    @pytest.mark.parametrize("device", cpu_and_gpu())
    @pytest.mark.parametrize("contiguous", (True, False))
    @pytest.mark.parametrize("batch_sz", (0, 33))
745
    def test_backward(self, device, contiguous, batch_sz):
746
747
748
        x, weight, offset, mask, bias, stride, padding, dilation = self.get_fn_args(
            device, contiguous, batch_sz, self.dtype
        )
749
750

        def func(x_, offset_, mask_, weight_, bias_):
751
752
753
            return ops.deform_conv2d(
                x_, offset_, weight_, bias_, stride=stride, padding=padding, dilation=dilation, mask=mask_
            )
754

755
        gradcheck(func, (x, offset, mask, weight, bias), nondet_tol=1e-5, fast_mode=True)
756
757

        def func_no_mask(x_, offset_, weight_, bias_):
758
759
760
            return ops.deform_conv2d(
                x_, offset_, weight_, bias_, stride=stride, padding=padding, dilation=dilation, mask=None
            )
761

762
        gradcheck(func_no_mask, (x, offset, weight, bias), nondet_tol=1e-5, fast_mode=True)
763
764
765
766

        @torch.jit.script
        def script_func(x_, offset_, mask_, weight_, bias_, stride_, pad_, dilation_):
            # type:(Tensor, Tensor, Tensor, Tensor, Tensor, Tuple[int, int], Tuple[int, int], Tuple[int, int])->Tensor
767
768
769
770
771
772
773
774
775
776
            return ops.deform_conv2d(
                x_, offset_, weight_, bias_, stride=stride_, padding=pad_, dilation=dilation_, mask=mask_
            )

        gradcheck(
            lambda z, off, msk, wei, bi: script_func(z, off, msk, wei, bi, stride, padding, dilation),
            (x, offset, mask, weight, bias),
            nondet_tol=1e-5,
            fast_mode=True,
        )
777
778

        @torch.jit.script
779
780
        def script_func_no_mask(x_, offset_, weight_, bias_, stride_, pad_, dilation_):
            # type:(Tensor, Tensor, Tensor, Tensor, Tuple[int, int], Tuple[int, int], Tuple[int, int])->Tensor
781
782
783
784
785
786
787
788
789
790
            return ops.deform_conv2d(
                x_, offset_, weight_, bias_, stride=stride_, padding=pad_, dilation=dilation_, mask=None
            )

        gradcheck(
            lambda z, off, wei, bi: script_func_no_mask(z, off, wei, bi, stride, padding, dilation),
            (x, offset, weight, bias),
            nondet_tol=1e-5,
            fast_mode=True,
        )
791

792
    @needs_cuda
793
    @pytest.mark.parametrize("contiguous", (True, False))
794
    def test_compare_cpu_cuda_grads(self, contiguous):
795
796
797
        # Test from https://github.com/pytorch/vision/issues/2598
        # Run on CUDA only

798
799
        # compare grads computed on CUDA with grads computed on CPU
        true_cpu_grads = None
800

801
802
803
804
        init_weight = torch.randn(9, 9, 3, 3, requires_grad=True)
        img = torch.randn(8, 9, 1000, 110)
        offset = torch.rand(8, 2 * 3 * 3, 1000, 110)
        mask = torch.rand(8, 3 * 3, 1000, 110)
805

806
807
808
809
810
811
812
        if not contiguous:
            img = img.permute(0, 1, 3, 2).contiguous().permute(0, 1, 3, 2)
            offset = offset.permute(1, 3, 0, 2).contiguous().permute(2, 0, 3, 1)
            mask = mask.permute(1, 3, 0, 2).contiguous().permute(2, 0, 3, 1)
            weight = init_weight.permute(3, 2, 0, 1).contiguous().permute(2, 3, 1, 0)
        else:
            weight = init_weight
813

814
        for d in ["cpu", "cuda"]:
815

816
817
818
819
820
821
822
823
824
825
826
            out = ops.deform_conv2d(img.to(d), offset.to(d), weight.to(d), padding=1, mask=mask.to(d))
            out.mean().backward()
            if true_cpu_grads is None:
                true_cpu_grads = init_weight.grad
                assert true_cpu_grads is not None
            else:
                assert init_weight.grad is not None
                res_grads = init_weight.grad.to("cpu")
                torch.testing.assert_close(true_cpu_grads, res_grads)

    @needs_cuda
827
828
    @pytest.mark.parametrize("batch_sz", (0, 33))
    @pytest.mark.parametrize("dtype", (torch.float, torch.half))
829
830
831
832
    def test_autocast(self, batch_sz, dtype):
        with torch.cuda.amp.autocast():
            self.test_forward(torch.device("cuda"), contiguous=False, batch_sz=batch_sz, dtype=dtype)

833
834
835
836
    def test_forward_scriptability(self):
        # Non-regression test for https://github.com/pytorch/vision/issues/4078
        torch.jit.script(ops.DeformConv2d(in_channels=8, out_channels=8, kernel_size=3))

837
838

class TestFrozenBNT:
839
840
    def test_frozenbatchnorm2d_repr(self):
        num_features = 32
841
842
        eps = 1e-5
        t = ops.misc.FrozenBatchNorm2d(num_features, eps=eps)
843
844

        # Check integrity of object __repr__ attribute
845
        expected_string = f"FrozenBatchNorm2d({num_features}, eps={eps})"
846
        assert repr(t) == expected_string
847

848
849
850
    def test_frozenbatchnorm2d_eps(self):
        sample_size = (4, 32, 28, 28)
        x = torch.rand(sample_size)
851
852
853
854
855
856
857
        state_dict = dict(
            weight=torch.rand(sample_size[1]),
            bias=torch.rand(sample_size[1]),
            running_mean=torch.rand(sample_size[1]),
            running_var=torch.rand(sample_size[1]),
            num_batches_tracked=torch.tensor(100),
        )
858

859
        # Check that default eps is equal to the one of BN
860
861
        fbn = ops.misc.FrozenBatchNorm2d(sample_size[1])
        fbn.load_state_dict(state_dict, strict=False)
862
        bn = torch.nn.BatchNorm2d(sample_size[1]).eval()
863
864
        bn.load_state_dict(state_dict)
        # Difference is expected to fall in an acceptable range
865
        torch.testing.assert_close(fbn(x), bn(x), rtol=1e-5, atol=1e-6)
866
867
868
869
870
871

        # Check computation for eps > 0
        fbn = ops.misc.FrozenBatchNorm2d(sample_size[1], eps=1e-5)
        fbn.load_state_dict(state_dict, strict=False)
        bn = torch.nn.BatchNorm2d(sample_size[1], eps=1e-5).eval()
        bn.load_state_dict(state_dict)
872
        torch.testing.assert_close(fbn(x), bn(x), rtol=1e-5, atol=1e-6)
873
874
875
876

    def test_frozenbatchnorm2d_n_arg(self):
        """Ensure a warning is thrown when passing `n` kwarg
        (remove this when support of `n` is dropped)"""
877
878
        with pytest.warns(DeprecationWarning):
            ops.misc.FrozenBatchNorm2d(32, eps=1e-5, n=32)
879

880

881
class TestBoxConversion:
882
883
884
    def _get_box_sequences():
        # Define here the argument type of `boxes` supported by region pooling operations
        box_tensor = torch.tensor([[0, 0, 0, 100, 100], [1, 0, 0, 100, 100]], dtype=torch.float)
885
886
887
888
        box_list = [
            torch.tensor([[0, 0, 100, 100]], dtype=torch.float),
            torch.tensor([[0, 0, 100, 100]], dtype=torch.float),
        ]
889
890
891
        box_tuple = tuple(box_list)
        return box_tensor, box_list, box_tuple

892
    @pytest.mark.parametrize("box_sequence", _get_box_sequences())
893
    def test_check_roi_boxes_shape(self, box_sequence):
894
        # Ensure common sequences of tensors are supported
895
        ops._utils.check_roi_boxes_shape(box_sequence)
896

897
    @pytest.mark.parametrize("box_sequence", _get_box_sequences())
898
    def test_convert_boxes_to_roi_format(self, box_sequence):
899
900
        # Ensure common sequences of tensors yield the same result
        ref_tensor = None
901
902
903
904
        if ref_tensor is None:
            ref_tensor = box_sequence
        else:
            assert_equal(ref_tensor, ops._utils.convert_boxes_to_roi_format(box_sequence))
905
906


907
class TestBox:
908
    def test_bbox_same(self):
909
910
911
        box_tensor = torch.tensor(
            [[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float
        )
912

913
        exp_xyxy = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)
914

915
916
917
918
        assert exp_xyxy.size() == torch.Size([4, 4])
        assert_equal(ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xyxy"), exp_xyxy)
        assert_equal(ops.box_convert(box_tensor, in_fmt="xywh", out_fmt="xywh"), exp_xyxy)
        assert_equal(ops.box_convert(box_tensor, in_fmt="cxcywh", out_fmt="cxcywh"), exp_xyxy)
919
920
921
922

    def test_bbox_xyxy_xywh(self):
        # Simple test convert boxes to xywh and back. Make sure they are same.
        # box_tensor is in x1 y1 x2 y2 format.
923
924
925
926
        box_tensor = torch.tensor(
            [[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float
        )
        exp_xywh = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float)
927

928
        assert exp_xywh.size() == torch.Size([4, 4])
929
        box_xywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xywh")
930
        assert_equal(box_xywh, exp_xywh)
931
932
933

        # Reverse conversion
        box_xyxy = ops.box_convert(box_xywh, in_fmt="xywh", out_fmt="xyxy")
934
        assert_equal(box_xyxy, box_tensor)
935
936
937
938

    def test_bbox_xyxy_cxcywh(self):
        # Simple test convert boxes to xywh and back. Make sure they are same.
        # box_tensor is in x1 y1 x2 y2 format.
939
940
941
942
943
944
        box_tensor = torch.tensor(
            [[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float
        )
        exp_cxcywh = torch.tensor(
            [[50, 50, 100, 100], [0, 0, 0, 0], [20, 25, 20, 20], [58, 65, 70, 60]], dtype=torch.float
        )
945

946
        assert exp_cxcywh.size() == torch.Size([4, 4])
947
        box_cxcywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="cxcywh")
948
        assert_equal(box_cxcywh, exp_cxcywh)
949
950
951

        # Reverse conversion
        box_xyxy = ops.box_convert(box_cxcywh, in_fmt="cxcywh", out_fmt="xyxy")
952
        assert_equal(box_xyxy, box_tensor)
953
954

    def test_bbox_xywh_cxcywh(self):
955
956
957
        box_tensor = torch.tensor(
            [[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float
        )
958
959

        # This is wrong
960
961
962
        exp_cxcywh = torch.tensor(
            [[50, 50, 100, 100], [0, 0, 0, 0], [20, 25, 20, 20], [58, 65, 70, 60]], dtype=torch.float
        )
963

964
        assert exp_cxcywh.size() == torch.Size([4, 4])
965
        box_cxcywh = ops.box_convert(box_tensor, in_fmt="xywh", out_fmt="cxcywh")
966
        assert_equal(box_cxcywh, exp_cxcywh)
967
968
969

        # Reverse conversion
        box_xywh = ops.box_convert(box_cxcywh, in_fmt="cxcywh", out_fmt="xywh")
970
        assert_equal(box_xywh, box_tensor)
971

972
973
    @pytest.mark.parametrize("inv_infmt", ["xwyh", "cxwyh"])
    @pytest.mark.parametrize("inv_outfmt", ["xwcx", "xhwcy"])
974
    def test_bbox_invalid(self, inv_infmt, inv_outfmt):
975
976
977
        box_tensor = torch.tensor(
            [[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float
        )
978

979
980
        with pytest.raises(ValueError):
            ops.box_convert(box_tensor, inv_infmt, inv_outfmt)
981
982

    def test_bbox_convert_jit(self):
983
984
985
        box_tensor = torch.tensor(
            [[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float
        )
986

987
988
        scripted_fn = torch.jit.script(ops.box_convert)
        TOLERANCE = 1e-3
989

990
        box_xywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xywh")
991
        scripted_xywh = scripted_fn(box_tensor, "xyxy", "xywh")
992
        torch.testing.assert_close(scripted_xywh, box_xywh, rtol=0.0, atol=TOLERANCE)
993

994
        box_cxcywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="cxcywh")
995
        scripted_cxcywh = scripted_fn(box_tensor, "xyxy", "cxcywh")
996
        torch.testing.assert_close(scripted_cxcywh, box_cxcywh, rtol=0.0, atol=TOLERANCE)
997
998


999
class TestBoxArea:
Aditya Oke's avatar
Aditya Oke committed
1000
    def test_box_area(self):
1001
1002
        def area_check(box, expected, tolerance=1e-4):
            out = ops.box_area(box)
1003
            torch.testing.assert_close(out, expected, rtol=0.0, check_dtype=False, atol=tolerance)
1004
1005
1006
1007
1008
1009
1010
1011
1012

        # Check for int boxes
        for dtype in [torch.int8, torch.int16, torch.int32, torch.int64]:
            box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0]], dtype=dtype)
            expected = torch.tensor([10000, 0])
            area_check(box_tensor, expected)

        # Check for float32 and float64 boxes
        for dtype in [torch.float32, torch.float64]:
1013
1014
1015
1016
1017
1018
1019
1020
            box_tensor = torch.tensor(
                [
                    [285.3538, 185.5758, 1193.5110, 851.4551],
                    [285.1472, 188.7374, 1192.4984, 851.0669],
                    [279.2440, 197.9812, 1189.4746, 849.2019],
                ],
                dtype=dtype,
            )
1021
1022
1023
1024
            expected = torch.tensor([604723.0806, 600965.4666, 592761.0085], dtype=torch.float64)
            area_check(box_tensor, expected, tolerance=0.05)

        # Check for float16 box
1025
1026
1027
1028
        box_tensor = torch.tensor(
            [[285.25, 185.625, 1194.0, 851.5], [285.25, 188.75, 1192.0, 851.0], [279.25, 198.0, 1189.0, 849.0]],
            dtype=torch.float16,
        )
1029
1030
        expected = torch.tensor([605113.875, 600495.1875, 592247.25])
        area_check(box_tensor, expected)
Aditya Oke's avatar
Aditya Oke committed
1031

Aditya Oke's avatar
Aditya Oke committed
1032
1033
1034
1035
1036
1037
1038
1039
    def test_box_area_jit(self):
        box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0]], dtype=torch.float)
        TOLERANCE = 1e-3
        expected = ops.box_area(box_tensor)
        scripted_fn = torch.jit.script(ops.box_area)
        scripted_area = scripted_fn(box_tensor)
        torch.testing.assert_close(scripted_area, expected, rtol=0.0, atol=TOLERANCE)

Aditya Oke's avatar
Aditya Oke committed
1040

1041
class TestBoxIou:
Aditya Oke's avatar
Aditya Oke committed
1042
    def test_iou(self):
1043
1044
        def iou_check(box, expected, tolerance=1e-4):
            out = ops.box_iou(box, box)
1045
            torch.testing.assert_close(out, expected, rtol=0.0, check_dtype=False, atol=tolerance)
1046
1047
1048
1049
1050
1051
1052
1053
1054

        # Check for int boxes
        for dtype in [torch.int16, torch.int32, torch.int64]:
            box = torch.tensor([[0, 0, 100, 100], [0, 0, 50, 50], [200, 200, 300, 300]], dtype=dtype)
            expected = torch.tensor([[1.0, 0.25, 0.0], [0.25, 1.0, 0.0], [0.0, 0.0, 1.0]])
            iou_check(box, expected)

        # Check for float boxes
        for dtype in [torch.float16, torch.float32, torch.float64]:
1055
1056
1057
1058
1059
1060
1061
1062
            box_tensor = torch.tensor(
                [
                    [285.3538, 185.5758, 1193.5110, 851.4551],
                    [285.1472, 188.7374, 1192.4984, 851.0669],
                    [279.2440, 197.9812, 1189.4746, 849.2019],
                ],
                dtype=dtype,
            )
1063
1064
            expected = torch.tensor([[1.0, 0.9933, 0.9673], [0.9933, 1.0, 0.9737], [0.9673, 0.9737, 1.0]])
            iou_check(box_tensor, expected, tolerance=0.002 if dtype == torch.float16 else 1e-4)
Aditya Oke's avatar
Aditya Oke committed
1065

Aditya Oke's avatar
Aditya Oke committed
1066
1067
1068
1069
1070
1071
1072
1073
    def test_iou_jit(self):
        box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 50, 50], [200, 200, 300, 300]], dtype=torch.float)
        TOLERANCE = 1e-3
        expected = ops.box_iou(box_tensor, box_tensor)
        scripted_fn = torch.jit.script(ops.box_iou)
        scripted_iou = scripted_fn(box_tensor, box_tensor)
        torch.testing.assert_close(scripted_iou, expected, rtol=0.0, atol=TOLERANCE)

Aditya Oke's avatar
Aditya Oke committed
1074

1075
class TestGenBoxIou:
Aditya Oke's avatar
Aditya Oke committed
1076
    def test_gen_iou(self):
1077
1078
        def gen_iou_check(box, expected, tolerance=1e-4):
            out = ops.generalized_box_iou(box, box)
1079
            torch.testing.assert_close(out, expected, rtol=0.0, check_dtype=False, atol=tolerance)
1080
1081
1082
1083
1084
1085
1086
1087
1088

        # Check for int boxes
        for dtype in [torch.int16, torch.int32, torch.int64]:
            box = torch.tensor([[0, 0, 100, 100], [0, 0, 50, 50], [200, 200, 300, 300]], dtype=dtype)
            expected = torch.tensor([[1.0, 0.25, -0.7778], [0.25, 1.0, -0.8611], [-0.7778, -0.8611, 1.0]])
            gen_iou_check(box, expected)

        # Check for float boxes
        for dtype in [torch.float16, torch.float32, torch.float64]:
1089
1090
1091
1092
1093
1094
1095
1096
            box_tensor = torch.tensor(
                [
                    [285.3538, 185.5758, 1193.5110, 851.4551],
                    [285.1472, 188.7374, 1192.4984, 851.0669],
                    [279.2440, 197.9812, 1189.4746, 849.2019],
                ],
                dtype=dtype,
            )
1097
1098
            expected = torch.tensor([[1.0, 0.9933, 0.9673], [0.9933, 1.0, 0.9737], [0.9673, 0.9737, 1.0]])
            gen_iou_check(box_tensor, expected, tolerance=0.002 if dtype == torch.float16 else 1e-3)
Aditya Oke's avatar
Aditya Oke committed
1099

Aditya Oke's avatar
Aditya Oke committed
1100
1101
1102
1103
1104
1105
1106
1107
    def test_giou_jit(self):
        box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 50, 50], [200, 200, 300, 300]], dtype=torch.float)
        TOLERANCE = 1e-3
        expected = ops.generalized_box_iou(box_tensor, box_tensor)
        scripted_fn = torch.jit.script(ops.generalized_box_iou)
        scripted_iou = scripted_fn(box_tensor, box_tensor)
        torch.testing.assert_close(scripted_iou, expected, rtol=0.0, atol=TOLERANCE)

Aditya Oke's avatar
Aditya Oke committed
1108

1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
class TestMasksToBoxes:
    def test_masks_box(self):
        def masks_box_check(masks, expected, tolerance=1e-4):
            out = ops.masks_to_boxes(masks)
            assert out.dtype == torch.float
            torch.testing.assert_close(out, expected, rtol=0.0, check_dtype=False, atol=tolerance)

        # Check for int type boxes.
        def _get_image():
            assets_directory = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets")
            mask_path = os.path.join(assets_directory, "masks.tiff")
            image = Image.open(mask_path)
            return image

        def _create_masks(image, masks):
            for index in range(image.n_frames):
                image.seek(index)
                frame = np.array(image)
                masks[index] = torch.tensor(frame)

            return masks

1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
        expected = torch.tensor(
            [
                [127, 2, 165, 40],
                [2, 50, 44, 92],
                [56, 63, 98, 100],
                [139, 68, 175, 104],
                [160, 112, 198, 145],
                [49, 138, 99, 182],
                [108, 148, 152, 213],
            ],
            dtype=torch.float,
        )
1143
1144
1145
1146
1147
1148
1149
1150

        image = _get_image()
        for dtype in [torch.float16, torch.float32, torch.float64]:
            masks = torch.zeros((image.n_frames, image.height, image.width), dtype=dtype)
            masks = _create_masks(image, masks)
            masks_box_check(masks, expected)


1151
class TestStochasticDepth:
1152
1153
    @pytest.mark.parametrize("p", [0.2, 0.5, 0.8])
    @pytest.mark.parametrize("mode", ["batch", "row"])
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
    def test_stochastic_depth(self, mode, p):
        stats = pytest.importorskip("scipy.stats")
        batch_size = 5
        x = torch.ones(size=(batch_size, 3, 4, 4))
        layer = ops.StochasticDepth(p=p, mode=mode).to(device=x.device, dtype=x.dtype)
        layer.__repr__()

        trials = 250
        num_samples = 0
        counts = 0
        for _ in range(trials):
            out = layer(x)
            non_zero_count = out.sum(dim=(1, 2, 3)).nonzero().size(0)
            if mode == "batch":
                if non_zero_count == 0:
                    counts += 1
                num_samples += 1
            elif mode == "row":
                counts += batch_size - non_zero_count
                num_samples += batch_size

        p_value = stats.binom_test(counts, num_samples, p=p)
        assert p_value > 0.0001


1179
if __name__ == "__main__":
1180
    pytest.main([__file__])