test_ops.py 45.3 KB
Newer Older
1
from common_utils import needs_cuda, cpu_and_gpu, assert_equal
2
import math
3
from abc import ABC, abstractmethod
4
import pytest
5

6
import numpy as np
7

8
import torch
9
from functools import lru_cache
10
from torch import Tensor
11
from torch.autograd import gradcheck
12
from torch.nn.modules.utils import _pair
13
from torchvision import ops
14
from typing import Tuple
15
16


17
18
class RoIOpTester(ABC):
    dtype = torch.float64
19

20
21
22
    @pytest.mark.parametrize('device', cpu_and_gpu())
    @pytest.mark.parametrize('contiguous', (True, False))
    def test_forward(self, device, contiguous, x_dtype=None, rois_dtype=None, **kwargs):
23
24
        x_dtype = self.dtype if x_dtype is None else x_dtype
        rois_dtype = self.dtype if rois_dtype is None else rois_dtype
25
26
27
        pool_size = 5
        # n_channels % (pool_size ** 2) == 0 required for PS opeartions.
        n_channels = 2 * (pool_size ** 2)
28
        x = torch.rand(2, n_channels, 10, 10, dtype=x_dtype, device=device)
29
30
        if not contiguous:
            x = x.permute(0, 1, 3, 2)
31
32
33
34
        rois = torch.tensor([[0, 0, 0, 9, 9],  # format is (xyxy)
                             [0, 0, 5, 4, 9],
                             [0, 5, 5, 9, 9],
                             [1, 0, 0, 9, 9]],
35
                            dtype=rois_dtype, device=device)
36

37
        pool_h, pool_w = pool_size, pool_size
38
        y = self.fn(x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs)
39
        # the following should be true whether we're running an autocast test or not.
40
        assert y.dtype == x.dtype
41
        gt_y = self.expected_fn(x, rois, pool_h, pool_w, spatial_scale=1,
42
                                sampling_ratio=-1, device=device, dtype=self.dtype, **kwargs)
43

44
        tol = 1e-3 if (x_dtype is torch.half or rois_dtype is torch.half) else 1e-5
45
        torch.testing.assert_close(gt_y.to(y), y, rtol=tol, atol=tol)
46

47
48
49
    @pytest.mark.parametrize('device', cpu_and_gpu())
    @pytest.mark.parametrize('contiguous', (True, False))
    def test_backward(self, device, contiguous):
50
51
52
53
54
55
56
57
        pool_size = 2
        x = torch.rand(1, 2 * (pool_size ** 2), 5, 5, dtype=self.dtype, device=device, requires_grad=True)
        if not contiguous:
            x = x.permute(0, 1, 3, 2)
        rois = torch.tensor([[0, 0, 0, 4, 4],  # format is (xyxy)
                             [0, 0, 2, 3, 4],
                             [0, 2, 2, 4, 4]],
                            dtype=self.dtype, device=device)
58

59
60
        def func(z):
            return self.fn(z, rois, pool_size, pool_size, spatial_scale=1, sampling_ratio=1)
61

62
        script_func = self.get_script_fn(rois, pool_size)
63

64
65
        gradcheck(func, (x,))
        gradcheck(script_func, (x,))
66

67
68
69
70
71
72
    @needs_cuda
    @pytest.mark.parametrize('x_dtype', (torch.float, torch.half))
    @pytest.mark.parametrize('rois_dtype', (torch.float, torch.half))
    def test_autocast(self, x_dtype, rois_dtype):
        with torch.cuda.amp.autocast():
            self.test_forward(torch.device("cuda"), contiguous=False, x_dtype=x_dtype, rois_dtype=rois_dtype)
73
74
75

    def _helper_boxes_shape(self, func):
        # test boxes as Tensor[N, 5]
76
        with pytest.raises(AssertionError):
77
78
79
80
81
            a = torch.linspace(1, 8 * 8, 8 * 8).reshape(1, 1, 8, 8)
            boxes = torch.tensor([[0, 0, 3, 3]], dtype=a.dtype)
            func(a, boxes, output_size=(2, 2))

        # test boxes as List[Tensor[N, 4]]
82
        with pytest.raises(AssertionError):
83
84
85
86
            a = torch.linspace(1, 8 * 8, 8 * 8).reshape(1, 1, 8, 8)
            boxes = torch.tensor([[0, 0, 3]], dtype=a.dtype)
            ops.roi_pool(a, [boxes], output_size=(2, 2))

87
    @abstractmethod
88
89
    def fn(*args, **kwargs):
        pass
90

91
    @abstractmethod
92
93
    def get_script_fn(*args, **kwargs):
        pass
94

95
    @abstractmethod
96
97
    def expected_fn(*args, **kwargs):
        pass
98

99

100
class TestRoiPool(RoIOpTester):
101
102
    def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs):
        return ops.RoIPool((pool_h, pool_w), spatial_scale)(x, rois)
103

104
    def get_script_fn(self, rois, pool_size):
Nicolas Hug's avatar
Nicolas Hug committed
105
106
        scriped = torch.jit.script(ops.roi_pool)
        return lambda x: scriped(x, rois, pool_size)
107

108
109
110
111
    def expected_fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1,
                    device=None, dtype=torch.float64):
        if device is None:
            device = torch.device("cpu")
112

113
114
        n_channels = x.size(1)
        y = torch.zeros(rois.size(0), n_channels, pool_h, pool_w, dtype=dtype, device=device)
115

116
117
        def get_slice(k, block):
            return slice(int(np.floor(k * block)), int(np.ceil((k + 1) * block)))
118

119
120
121
122
        for roi_idx, roi in enumerate(rois):
            batch_idx = int(roi[0])
            j_begin, i_begin, j_end, i_end = (int(round(x.item() * spatial_scale)) for x in roi[1:])
            roi_x = x[batch_idx, :, i_begin:i_end + 1, j_begin:j_end + 1]
123

124
125
126
            roi_h, roi_w = roi_x.shape[-2:]
            bin_h = roi_h / pool_h
            bin_w = roi_w / pool_w
127

128
129
130
131
132
133
            for i in range(0, pool_h):
                for j in range(0, pool_w):
                    bin_x = roi_x[:, get_slice(i, bin_h), get_slice(j, bin_w)]
                    if bin_x.numel() > 0:
                        y[roi_idx, :, i, j] = bin_x.reshape(n_channels, -1).max(dim=1)[0]
        return y
134

135
    def test_boxes_shape(self):
136
137
        self._helper_boxes_shape(ops.roi_pool)

138

139
class TestPSRoIPool(RoIOpTester):
140
141
    def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs):
        return ops.PSRoIPool((pool_h, pool_w), 1)(x, rois)
142

143
    def get_script_fn(self, rois, pool_size):
Nicolas Hug's avatar
Nicolas Hug committed
144
145
        scriped = torch.jit.script(ops.ps_roi_pool)
        return lambda x: scriped(x, rois, pool_size)
146

147
148
149
150
151
    def expected_fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1,
                    device=None, dtype=torch.float64):
        if device is None:
            device = torch.device("cpu")
        n_input_channels = x.size(1)
152
        assert n_input_channels % (pool_h * pool_w) == 0, "input channels must be divisible by ph * pw"
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
        n_output_channels = int(n_input_channels / (pool_h * pool_w))
        y = torch.zeros(rois.size(0), n_output_channels, pool_h, pool_w, dtype=dtype, device=device)

        def get_slice(k, block):
            return slice(int(np.floor(k * block)), int(np.ceil((k + 1) * block)))

        for roi_idx, roi in enumerate(rois):
            batch_idx = int(roi[0])
            j_begin, i_begin, j_end, i_end = (int(round(x.item() * spatial_scale)) for x in roi[1:])
            roi_x = x[batch_idx, :, i_begin:i_end + 1, j_begin:j_end + 1]

            roi_height = max(i_end - i_begin, 1)
            roi_width = max(j_end - j_begin, 1)
            bin_h, bin_w = roi_height / float(pool_h), roi_width / float(pool_w)

            for i in range(0, pool_h):
                for j in range(0, pool_w):
                    bin_x = roi_x[:, get_slice(i, bin_h), get_slice(j, bin_w)]
                    if bin_x.numel() > 0:
                        area = bin_x.size(-2) * bin_x.size(-1)
                        for c_out in range(0, n_output_channels):
                            c_in = c_out * (pool_h * pool_w) + pool_w * i + j
                            t = torch.sum(bin_x[c_in, :, :])
                            y[roi_idx, c_out, i, j] = t / area
        return y
178

179
    def test_boxes_shape(self):
180
181
        self._helper_boxes_shape(ops.ps_roi_pool)

182

183
184
def bilinear_interpolate(data, y, x, snap_border=False):
    height, width = data.shape
185

186
187
188
189
190
    if snap_border:
        if -1 < y <= 0:
            y = 0
        elif height - 1 <= y < height:
            y = height - 1
191

192
193
194
195
        if -1 < x <= 0:
            x = 0
        elif width - 1 <= x < width:
            x = width - 1
196

197
198
199
200
    y_low = int(math.floor(y))
    x_low = int(math.floor(x))
    y_high = y_low + 1
    x_high = x_low + 1
201

202
203
    wy_h = y - y_low
    wx_h = x - x_low
204
    wy_l = 1 - wy_h
205
    wx_l = 1 - wx_h
206

207
    val = 0
208
209
210
211
    for wx, xp in zip((wx_l, wx_h), (x_low, x_high)):
        for wy, yp in zip((wy_l, wy_h), (y_low, y_high)):
            if 0 <= yp < height and 0 <= xp < width:
                val += wx * wy * data[yp, xp]
212
    return val
213
214


215
class TestRoIAlign(RoIOpTester):
AhnDW's avatar
AhnDW committed
216
    def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, aligned=False, **kwargs):
217
        return ops.RoIAlign((pool_h, pool_w), spatial_scale=spatial_scale,
AhnDW's avatar
AhnDW committed
218
                            sampling_ratio=sampling_ratio, aligned=aligned)(x, rois)
219

220
    def get_script_fn(self, rois, pool_size):
Nicolas Hug's avatar
Nicolas Hug committed
221
222
        scriped = torch.jit.script(ops.roi_align)
        return lambda x: scriped(x, rois, pool_size)
223

AhnDW's avatar
AhnDW committed
224
    def expected_fn(self, in_data, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, aligned=False,
225
                    device=None, dtype=torch.float64):
226
227
        if device is None:
            device = torch.device("cpu")
228
229
230
        n_channels = in_data.size(1)
        out_data = torch.zeros(rois.size(0), n_channels, pool_h, pool_w, dtype=dtype, device=device)

AhnDW's avatar
AhnDW committed
231
232
        offset = 0.5 if aligned else 0.

233
234
        for r, roi in enumerate(rois):
            batch_idx = int(roi[0])
AhnDW's avatar
AhnDW committed
235
            j_begin, i_begin, j_end, i_end = (x.item() * spatial_scale - offset for x in roi[1:])
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255

            roi_h = i_end - i_begin
            roi_w = j_end - j_begin
            bin_h = roi_h / pool_h
            bin_w = roi_w / pool_w

            for i in range(0, pool_h):
                start_h = i_begin + i * bin_h
                grid_h = sampling_ratio if sampling_ratio > 0 else int(np.ceil(bin_h))
                for j in range(0, pool_w):
                    start_w = j_begin + j * bin_w
                    grid_w = sampling_ratio if sampling_ratio > 0 else int(np.ceil(bin_w))

                    for channel in range(0, n_channels):

                        val = 0
                        for iy in range(0, grid_h):
                            y = start_h + (iy + 0.5) * bin_h / grid_h
                            for ix in range(0, grid_w):
                                x = start_w + (ix + 0.5) * bin_w / grid_w
256
                                val += bilinear_interpolate(in_data[batch_idx, channel, :, :], y, x, snap_border=True)
257
258
259
                        val /= grid_h * grid_w

                        out_data[r, channel, i, j] = val
260
261
        return out_data

262
    def test_boxes_shape(self):
263
264
        self._helper_boxes_shape(ops.roi_align)

265
266
267
268
269
270
    @pytest.mark.parametrize('aligned', (True, False))
    @pytest.mark.parametrize('device', cpu_and_gpu())
    @pytest.mark.parametrize('contiguous', (True, False))
    def test_forward(self, device, contiguous, aligned, x_dtype=None, rois_dtype=None):
        super().test_forward(device=device, contiguous=contiguous, x_dtype=x_dtype, rois_dtype=rois_dtype,
                             aligned=aligned)
271

272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
    @needs_cuda
    @pytest.mark.parametrize('aligned', (True, False))
    @pytest.mark.parametrize('x_dtype', (torch.float, torch.half))
    @pytest.mark.parametrize('rois_dtype', (torch.float, torch.half))
    def test_autocast(self, aligned, x_dtype, rois_dtype):
        with torch.cuda.amp.autocast():
            self.test_forward(torch.device("cuda"), contiguous=False, aligned=aligned, x_dtype=x_dtype,
                              rois_dtype=rois_dtype)

    def _make_rois(self, img_size, num_imgs, dtype, num_rois=1000):
        rois = torch.randint(0, img_size // 2, size=(num_rois, 5)).to(dtype)
        rois[:, 0] = torch.randint(0, num_imgs, size=(num_rois,))  # set batch index
        rois[:, 3:] += rois[:, 1:3]  # make sure boxes aren't degenerate
        return rois

    @pytest.mark.parametrize('aligned', (True, False))
    @pytest.mark.parametrize('scale, zero_point', ((1, 0), (2, 10), (0.1, 50)))
    @pytest.mark.parametrize('qdtype', (torch.qint8, torch.quint8, torch.qint32))
    def test_qroialign(self, aligned, scale, zero_point, qdtype):
291
292
293
294
295
296
297
        """Make sure quantized version of RoIAlign is close to float version"""
        pool_size = 5
        img_size = 10
        n_channels = 2
        num_imgs = 1
        dtype = torch.float

298
299
300
301
302
303
304
        x = torch.randint(50, 100, size=(num_imgs, n_channels, img_size, img_size)).to(dtype)
        qx = torch.quantize_per_tensor(x, scale=scale, zero_point=zero_point, dtype=qdtype)

        rois = self._make_rois(img_size, num_imgs, dtype)
        qrois = torch.quantize_per_tensor(rois, scale=scale, zero_point=zero_point, dtype=qdtype)

        x, rois = qx.dequantize(), qrois.dequantize()  # we want to pass the same inputs
305

306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
        y = ops.roi_align(
            x,
            rois,
            output_size=pool_size,
            spatial_scale=1,
            sampling_ratio=-1,
            aligned=aligned,
        )
        qy = ops.roi_align(
            qx,
            qrois,
            output_size=pool_size,
            spatial_scale=1,
            sampling_ratio=-1,
            aligned=aligned,
        )

        # The output qy is itself a quantized tensor and there might have been a loss of info when it was
        # quantized. For a fair comparison we need to quantize y as well
        quantized_float_y = torch.quantize_per_tensor(y, scale=scale, zero_point=zero_point, dtype=qdtype)

        try:
            # Ideally, we would assert this, which passes with (scale, zero) == (1, 0)
            assert (qy == quantized_float_y).all()
        except AssertionError:
            # But because the computation aren't exactly the same between the 2 RoIAlign procedures, some
            # rounding error may lead to a difference of 2 in the output.
            # For example with (scale, zero) = (2, 10), 45.00000... will be quantized to 44
            # but 45.00000001 will be rounded to 46. We make sure below that:
            # - such discrepancies between qy and quantized_float_y are very rare (less then 5%)
            # - any difference between qy and quantized_float_y is == scale
            diff_idx = torch.where(qy != quantized_float_y)
            num_diff = diff_idx[0].numel()
            assert num_diff / qy.numel() < .05

            abs_diff = torch.abs(qy[diff_idx].dequantize() - quantized_float_y[diff_idx].dequantize())
            t_scale = torch.full_like(abs_diff, fill_value=scale)
            torch.testing.assert_close(abs_diff, t_scale, rtol=1e-5, atol=1e-5)

    def test_qroi_align_multiple_images(self):
        dtype = torch.float
347
348
        x = torch.randint(50, 100, size=(2, 3, 10, 10)).to(dtype)
        qx = torch.quantize_per_tensor(x, scale=1, zero_point=0, dtype=torch.qint8)
349
        rois = self._make_rois(img_size=10, num_imgs=2, dtype=dtype, num_rois=10)
350
        qrois = torch.quantize_per_tensor(rois, scale=1, zero_point=0, dtype=torch.qint8)
351
352
        with pytest.raises(RuntimeError, match="Only one image per batch is allowed"):
            ops.roi_align(qx, qrois, output_size=5)
353

354

355
class TestPSRoIAlign(RoIOpTester):
356
357
358
    def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs):
        return ops.PSRoIAlign((pool_h, pool_w), spatial_scale=spatial_scale,
                              sampling_ratio=sampling_ratio)(x, rois)
359

360
    def get_script_fn(self, rois, pool_size):
Nicolas Hug's avatar
Nicolas Hug committed
361
362
        scriped = torch.jit.script(ops.ps_roi_align)
        return lambda x: scriped(x, rois, pool_size)
363

364
365
    def expected_fn(self, in_data, rois, pool_h, pool_w, device, spatial_scale=1,
                    sampling_ratio=-1, dtype=torch.float64):
366
367
        if device is None:
            device = torch.device("cpu")
368
        n_input_channels = in_data.size(1)
369
        assert n_input_channels % (pool_h * pool_w) == 0, "input channels must be divisible by ph * pw"
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
        n_output_channels = int(n_input_channels / (pool_h * pool_w))
        out_data = torch.zeros(rois.size(0), n_output_channels, pool_h, pool_w, dtype=dtype, device=device)

        for r, roi in enumerate(rois):
            batch_idx = int(roi[0])
            j_begin, i_begin, j_end, i_end = (x.item() * spatial_scale - 0.5 for x in roi[1:])

            roi_h = i_end - i_begin
            roi_w = j_end - j_begin
            bin_h = roi_h / pool_h
            bin_w = roi_w / pool_w

            for i in range(0, pool_h):
                start_h = i_begin + i * bin_h
                grid_h = sampling_ratio if sampling_ratio > 0 else int(np.ceil(bin_h))
                for j in range(0, pool_w):
                    start_w = j_begin + j * bin_w
                    grid_w = sampling_ratio if sampling_ratio > 0 else int(np.ceil(bin_w))
                    for c_out in range(0, n_output_channels):
                        c_in = c_out * (pool_h * pool_w) + pool_w * i + j

                        val = 0
                        for iy in range(0, grid_h):
                            y = start_h + (iy + 0.5) * bin_h / grid_h
                            for ix in range(0, grid_w):
                                x = start_w + (ix + 0.5) * bin_w / grid_w
396
                                val += bilinear_interpolate(in_data[batch_idx, c_in, :, :], y, x, snap_border=True)
397
398
399
400
                        val /= grid_h * grid_w

                        out_data[r, c_out, i, j] = val
        return out_data
401

402
    def test_boxes_shape(self):
403
404
        self._helper_boxes_shape(ops.ps_roi_align)

405

406
class TestMultiScaleRoIAlign:
407
408
409
410
411
412
413
414
415
416
    def test_msroialign_repr(self):
        fmap_names = ['0']
        output_size = (7, 7)
        sampling_ratio = 2
        # Pass mock feature map names
        t = ops.poolers.MultiScaleRoIAlign(fmap_names, output_size, sampling_ratio)

        # Check integrity of object __repr__ attribute
        expected_string = (f"MultiScaleRoIAlign(featmap_names={fmap_names}, output_size={output_size}, "
                           f"sampling_ratio={sampling_ratio})")
417
        assert repr(t) == expected_string
418
419


420
421
class TestNMS:
    def _reference_nms(self, boxes, scores, iou_threshold):
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
        """
        Args:
            box_scores (N, 5): boxes in corner-form and probabilities.
            iou_threshold: intersection over union threshold.
        Returns:
             picked: a list of indexes of the kept boxes
        """
        picked = []
        _, indexes = scores.sort(descending=True)
        while len(indexes) > 0:
            current = indexes[0]
            picked.append(current.item())
            if len(indexes) == 1:
                break
            current_box = boxes[current, :]
            indexes = indexes[1:]
            rest_boxes = boxes[indexes, :]
            iou = ops.box_iou(rest_boxes, current_box.unsqueeze(0)).squeeze(1)
            indexes = indexes[iou <= iou_threshold]

        return torch.as_tensor(picked)

444
445
446
447
448
    def _create_tensors_with_iou(self, N, iou_thresh):
        # force last box to have a pre-defined iou with the first box
        # let b0 be [x0, y0, x1, y1], and b1 be [x0, y0, x1 + d, y1],
        # then, in order to satisfy ops.iou(b0, b1) == iou_thresh,
        # we need to have d = (x1 - x0) * (1 - iou_thresh) / iou_thresh
449
450
451
        # Adjust the threshold upward a bit with the intent of creating
        # at least one box that exceeds (barely) the threshold and so
        # should be suppressed.
452
        boxes = torch.rand(N, 4) * 100
453
454
455
        boxes[:, 2:] += boxes[:, :2]
        boxes[-1, :] = boxes[0, :]
        x0, y0, x1, y1 = boxes[-1].tolist()
456
        iou_thresh += 1e-5
457
        boxes[-1, 2] += (x1 - x0) * (1 - iou_thresh) / iou_thresh
458
459
460
        scores = torch.rand(N)
        return boxes, scores

461
462
    @pytest.mark.parametrize("iou", (.2, .5, .8))
    def test_nms_ref(self, iou):
463
        err_msg = 'NMS incompatible between CPU and reference implementation for IoU={}'
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
        boxes, scores = self._create_tensors_with_iou(1000, iou)
        keep_ref = self._reference_nms(boxes, scores, iou)
        keep = ops.nms(boxes, scores, iou)
        assert torch.allclose(keep, keep_ref), err_msg.format(iou)

    def test_nms_input_errors(self):
        with pytest.raises(RuntimeError):
            ops.nms(torch.rand(4), torch.rand(3), 0.5)
        with pytest.raises(RuntimeError):
            ops.nms(torch.rand(3, 5), torch.rand(3), 0.5)
        with pytest.raises(RuntimeError):
            ops.nms(torch.rand(3, 4), torch.rand(3, 2), 0.5)
        with pytest.raises(RuntimeError):
            ops.nms(torch.rand(3, 4), torch.rand(4), 0.5)

    @pytest.mark.parametrize("iou", (.2, .5, .8))
    @pytest.mark.parametrize("scale, zero_point", ((1, 0), (2, 50), (3, 10)))
    def test_qnms(self, iou, scale, zero_point):
482
483
484
485
        # Note: we compare qnms vs nms instead of qnms vs reference implementation.
        # This is because with the int convertion, the trick used in _create_tensors_with_iou
        # doesn't really work (in fact, nms vs reference implem will also fail with ints)
        err_msg = 'NMS and QNMS give different results for IoU={}'
486
487
        boxes, scores = self._create_tensors_with_iou(1000, iou)
        scores *= 100  # otherwise most scores would be 0 or 1 after int convertion
488

489
490
        qboxes = torch.quantize_per_tensor(boxes, scale=scale, zero_point=zero_point, dtype=torch.quint8)
        qscores = torch.quantize_per_tensor(scores, scale=scale, zero_point=zero_point, dtype=torch.quint8)
491

492
493
        boxes = qboxes.dequantize()
        scores = qscores.dequantize()
494

495
496
        keep = ops.nms(boxes, scores, iou)
        qkeep = ops.nms(qboxes, qscores, iou)
497

498
        assert torch.allclose(qkeep, keep), err_msg.format(iou)
499

500
501
502
    @needs_cuda
    @pytest.mark.parametrize("iou", (.2, .5, .8))
    def test_nms_cuda(self, iou, dtype=torch.float64):
503
        tol = 1e-3 if dtype is torch.half else 1e-5
504
505
        err_msg = 'NMS incompatible between CPU and CUDA for IoU={}'

506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
        boxes, scores = self._create_tensors_with_iou(1000, iou)
        r_cpu = ops.nms(boxes, scores, iou)
        r_cuda = ops.nms(boxes.cuda(), scores.cuda(), iou)

        is_eq = torch.allclose(r_cpu, r_cuda.cpu())
        if not is_eq:
            # if the indices are not the same, ensure that it's because the scores
            # are duplicate
            is_eq = torch.allclose(scores[r_cpu], scores[r_cuda.cpu()], rtol=tol, atol=tol)
        assert is_eq, err_msg.format(iou)

    @needs_cuda
    @pytest.mark.parametrize("iou", (.2, .5, .8))
    @pytest.mark.parametrize("dtype", (torch.float, torch.half))
    def test_autocast(self, iou, dtype):
        with torch.cuda.amp.autocast():
            self.test_nms_cuda(iou=iou, dtype=dtype)

    @needs_cuda
525
526
527
528
529
530
531
532
533
    def test_nms_cuda_float16(self):
        boxes = torch.tensor([[285.3538, 185.5758, 1193.5110, 851.4551],
                              [285.1472, 188.7374, 1192.4984, 851.0669],
                              [279.2440, 197.9812, 1189.4746, 849.2019]]).cuda()
        scores = torch.tensor([0.6370, 0.7569, 0.3966]).cuda()

        iou_thres = 0.2
        keep32 = ops.nms(boxes, scores, iou_thres)
        keep16 = ops.nms(boxes.to(torch.float16), scores.to(torch.float16), iou_thres)
534
        assert_equal(keep32, keep16)
535

536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
    def test_batched_nms_implementations(self):
        """Make sure that both implementations of batched_nms yield identical results"""

        num_boxes = 1000
        iou_threshold = .9

        boxes = torch.cat((torch.rand(num_boxes, 2), torch.rand(num_boxes, 2) + 10), dim=1)
        assert max(boxes[:, 0]) < min(boxes[:, 2])  # x1 < x2
        assert max(boxes[:, 1]) < min(boxes[:, 3])  # y1 < y2

        scores = torch.rand(num_boxes)
        idxs = torch.randint(0, 4, size=(num_boxes,))
        keep_vanilla = ops.boxes._batched_nms_vanilla(boxes, scores, idxs, iou_threshold)
        keep_trick = ops.boxes._batched_nms_coordinate_trick(boxes, scores, idxs, iou_threshold)

551
552
553
        torch.testing.assert_close(
            keep_vanilla, keep_trick, msg="The vanilla and the trick implementation yield different nms outputs."
        )
554
555
556

        # Also make sure an empty tensor is returned if boxes is empty
        empty = torch.empty((0,), dtype=torch.int64)
557
        torch.testing.assert_close(empty, ops.batched_nms(empty, None, None, None))
558

559

560
561
562
class TestDeformConv:
    dtype = torch.float64

563
    def expected_fn(self, x, weight, offset, mask, bias, stride=1, padding=0, dilation=1):
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
        stride_h, stride_w = _pair(stride)
        pad_h, pad_w = _pair(padding)
        dil_h, dil_w = _pair(dilation)
        weight_h, weight_w = weight.shape[-2:]

        n_batches, n_in_channels, in_h, in_w = x.shape
        n_out_channels = weight.shape[0]

        out_h = (in_h + 2 * pad_h - (dil_h * (weight_h - 1) + 1)) // stride_h + 1
        out_w = (in_w + 2 * pad_w - (dil_w * (weight_w - 1) + 1)) // stride_w + 1

        n_offset_grps = offset.shape[1] // (2 * weight_h * weight_w)
        in_c_per_offset_grp = n_in_channels // n_offset_grps

        n_weight_grps = n_in_channels // weight.shape[1]
        in_c_per_weight_grp = weight.shape[1]
        out_c_per_weight_grp = n_out_channels // n_weight_grps

        out = torch.zeros(n_batches, n_out_channels, out_h, out_w, device=x.device, dtype=x.dtype)
        for b in range(n_batches):
            for c_out in range(n_out_channels):
                for i in range(out_h):
                    for j in range(out_w):
                        for di in range(weight_h):
                            for dj in range(weight_w):
                                for c in range(in_c_per_weight_grp):
                                    weight_grp = c_out // out_c_per_weight_grp
                                    c_in = weight_grp * in_c_per_weight_grp + c

                                    offset_grp = c_in // in_c_per_offset_grp
594
595
                                    mask_idx = offset_grp * (weight_h * weight_w) + di * weight_w + dj
                                    offset_idx = 2 * mask_idx
596
597
598
599

                                    pi = stride_h * i - pad_h + dil_h * di + offset[b, offset_idx, i, j]
                                    pj = stride_w * j - pad_w + dil_w * dj + offset[b, offset_idx + 1, i, j]

600
601
602
603
604
                                    mask_value = 1.0
                                    if mask is not None:
                                        mask_value = mask[b, mask_idx, i, j]

                                    out[b, c_out, i, j] += (mask_value * weight[c_out, c, di, dj] *
605
606
607
608
                                                            bilinear_interpolate(x[b, c_in, :, :], pi, pj))
        out += bias.view(1, n_out_channels, 1, 1)
        return out

609
    @lru_cache(maxsize=None)
610
    def get_fn_args(self, device, contiguous, batch_sz, dtype):
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
        n_in_channels = 6
        n_out_channels = 2
        n_weight_grps = 2
        n_offset_grps = 3

        stride = (2, 1)
        pad = (1, 0)
        dilation = (2, 1)

        stride_h, stride_w = stride
        pad_h, pad_w = pad
        dil_h, dil_w = dilation
        weight_h, weight_w = (3, 2)
        in_h, in_w = (5, 4)

        out_h = (in_h + 2 * pad_h - (dil_h * (weight_h - 1) + 1)) // stride_h + 1
        out_w = (in_w + 2 * pad_w - (dil_w * (weight_w - 1) + 1)) // stride_w + 1

629
        x = torch.rand(batch_sz, n_in_channels, in_h, in_w, device=device, dtype=dtype, requires_grad=True)
630
631

        offset = torch.randn(batch_sz, n_offset_grps * 2 * weight_h * weight_w, out_h, out_w,
632
                             device=device, dtype=dtype, requires_grad=True)
633

634
635
636
        mask = torch.randn(batch_sz, n_offset_grps * weight_h * weight_w, out_h, out_w,
                           device=device, dtype=dtype, requires_grad=True)

637
        weight = torch.randn(n_out_channels, n_in_channels // n_weight_grps, weight_h, weight_w,
638
                             device=device, dtype=dtype, requires_grad=True)
639

640
        bias = torch.randn(n_out_channels, device=device, dtype=dtype, requires_grad=True)
641
642
643
644

        if not contiguous:
            x = x.permute(0, 1, 3, 2).contiguous().permute(0, 1, 3, 2)
            offset = offset.permute(1, 3, 0, 2).contiguous().permute(2, 0, 3, 1)
645
            mask = mask.permute(1, 3, 0, 2).contiguous().permute(2, 0, 3, 1)
646
647
            weight = weight.permute(3, 2, 0, 1).contiguous().permute(2, 3, 1, 0)

648
        return x, weight, offset, mask, bias, stride, pad, dilation
649

650
651
652
653
654
    @pytest.mark.parametrize('device', cpu_and_gpu())
    @pytest.mark.parametrize('contiguous', (True, False))
    @pytest.mark.parametrize('batch_sz', (0, 33))
    def test_forward(self, device, contiguous, batch_sz, dtype=None):
        dtype = dtype or self.dtype
655
        x, _, offset, mask, _, stride, padding, dilation = self.get_fn_args(device, contiguous, batch_sz, dtype)
656
657
658
659
        in_channels = 6
        out_channels = 2
        kernel_size = (3, 2)
        groups = 2
Nicolas Hug's avatar
Nicolas Hug committed
660
        tol = 2e-3 if dtype is torch.half else 1e-5
661
662

        layer = ops.DeformConv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding,
663
                                 dilation=dilation, groups=groups).to(device=x.device, dtype=dtype)
664
        res = layer(x, offset, mask)
665
666
667

        weight = layer.weight.data
        bias = layer.bias.data
668
669
        expected = self.expected_fn(x, weight, offset, mask, bias, stride=stride, padding=padding, dilation=dilation)

670
671
672
        torch.testing.assert_close(
            res.to(expected), expected, rtol=tol, atol=tol, msg='\nres:\n{}\nexpected:\n{}'.format(res, expected)
        )
673
674
675
676

        # no modulation test
        res = layer(x, offset)
        expected = self.expected_fn(x, weight, offset, None, bias, stride=stride, padding=padding, dilation=dilation)
677

678
679
680
        torch.testing.assert_close(
            res.to(expected), expected, rtol=tol, atol=tol, msg='\nres:\n{}\nexpected:\n{}'.format(res, expected)
        )
681

682
683
684
685
686
687
688
689
690
691
    def test_wrong_sizes(self):
        in_channels = 6
        out_channels = 2
        kernel_size = (3, 2)
        groups = 2
        x, _, offset, mask, _, stride, padding, dilation = self.get_fn_args('cpu', contiguous=True,
                                                                            batch_sz=10, dtype=self.dtype)
        layer = ops.DeformConv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding,
                                 dilation=dilation, groups=groups)
        with pytest.raises(RuntimeError, match="the shape of the offset"):
692
            wrong_offset = torch.rand_like(offset[:, :2])
693
            layer(x, wrong_offset)
694

695
        with pytest.raises(RuntimeError, match=r'mask.shape\[1\] is not valid'):
696
            wrong_mask = torch.rand_like(mask[:, :2])
697
            layer(x, offset, wrong_mask)
698

699
700
701
702
    @pytest.mark.parametrize('device', cpu_and_gpu())
    @pytest.mark.parametrize('contiguous', (True, False))
    @pytest.mark.parametrize('batch_sz', (0, 33))
    def test_backward(self, device, contiguous, batch_sz):
703
704
705
706
707
708
        x, weight, offset, mask, bias, stride, padding, dilation = self.get_fn_args(device, contiguous,
                                                                                    batch_sz, self.dtype)

        def func(x_, offset_, mask_, weight_, bias_):
            return ops.deform_conv2d(x_, offset_, weight_, bias_, stride=stride,
                                     padding=padding, dilation=dilation, mask=mask_)
709

710
        gradcheck(func, (x, offset, mask, weight, bias), nondet_tol=1e-5, fast_mode=True)
711
712
713
714
715

        def func_no_mask(x_, offset_, weight_, bias_):
            return ops.deform_conv2d(x_, offset_, weight_, bias_, stride=stride,
                                     padding=padding, dilation=dilation, mask=None)

716
        gradcheck(func_no_mask, (x, offset, weight, bias), nondet_tol=1e-5, fast_mode=True)
717
718
719
720
721
722

        @torch.jit.script
        def script_func(x_, offset_, mask_, weight_, bias_, stride_, pad_, dilation_):
            # type:(Tensor, Tensor, Tensor, Tensor, Tensor, Tuple[int, int], Tuple[int, int], Tuple[int, int])->Tensor
            return ops.deform_conv2d(x_, offset_, weight_, bias_, stride=stride_,
                                     padding=pad_, dilation=dilation_, mask=mask_)
723

724
        gradcheck(lambda z, off, msk, wei, bi: script_func(z, off, msk, wei, bi, stride, padding, dilation),
725
                  (x, offset, mask, weight, bias), nondet_tol=1e-5, fast_mode=True)
726
727

        @torch.jit.script
728
729
730
731
        def script_func_no_mask(x_, offset_, weight_, bias_, stride_, pad_, dilation_):
            # type:(Tensor, Tensor, Tensor, Tensor, Tuple[int, int], Tuple[int, int], Tuple[int, int])->Tensor
            return ops.deform_conv2d(x_, offset_, weight_, bias_, stride=stride_,
                                     padding=pad_, dilation=dilation_, mask=None)
732

733
        gradcheck(lambda z, off, wei, bi: script_func_no_mask(z, off, wei, bi, stride, padding, dilation),
734
                  (x, offset, weight, bias), nondet_tol=1e-5, fast_mode=True)
735

736
737
738
    @needs_cuda
    @pytest.mark.parametrize('contiguous', (True, False))
    def test_compare_cpu_cuda_grads(self, contiguous):
739
740
741
        # Test from https://github.com/pytorch/vision/issues/2598
        # Run on CUDA only

742
743
        # compare grads computed on CUDA with grads computed on CPU
        true_cpu_grads = None
744

745
746
747
748
        init_weight = torch.randn(9, 9, 3, 3, requires_grad=True)
        img = torch.randn(8, 9, 1000, 110)
        offset = torch.rand(8, 2 * 3 * 3, 1000, 110)
        mask = torch.rand(8, 3 * 3, 1000, 110)
749

750
751
752
753
754
755
756
        if not contiguous:
            img = img.permute(0, 1, 3, 2).contiguous().permute(0, 1, 3, 2)
            offset = offset.permute(1, 3, 0, 2).contiguous().permute(2, 0, 3, 1)
            mask = mask.permute(1, 3, 0, 2).contiguous().permute(2, 0, 3, 1)
            weight = init_weight.permute(3, 2, 0, 1).contiguous().permute(2, 3, 1, 0)
        else:
            weight = init_weight
757

758
        for d in ["cpu", "cuda"]:
759

760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
            out = ops.deform_conv2d(img.to(d), offset.to(d), weight.to(d), padding=1, mask=mask.to(d))
            out.mean().backward()
            if true_cpu_grads is None:
                true_cpu_grads = init_weight.grad
                assert true_cpu_grads is not None
            else:
                assert init_weight.grad is not None
                res_grads = init_weight.grad.to("cpu")
                torch.testing.assert_close(true_cpu_grads, res_grads)

    @needs_cuda
    @pytest.mark.parametrize('batch_sz', (0, 33))
    @pytest.mark.parametrize('dtype', (torch.float, torch.half))
    def test_autocast(self, batch_sz, dtype):
        with torch.cuda.amp.autocast():
            self.test_forward(torch.device("cuda"), contiguous=False, batch_sz=batch_sz, dtype=dtype)

777
778
779
780
    def test_forward_scriptability(self):
        # Non-regression test for https://github.com/pytorch/vision/issues/4078
        torch.jit.script(ops.DeformConv2d(in_channels=8, out_channels=8, kernel_size=3))

781
782

class TestFrozenBNT:
783
784
    def test_frozenbatchnorm2d_repr(self):
        num_features = 32
785
786
        eps = 1e-5
        t = ops.misc.FrozenBatchNorm2d(num_features, eps=eps)
787
788

        # Check integrity of object __repr__ attribute
789
        expected_string = f"FrozenBatchNorm2d({num_features}, eps={eps})"
790
        assert repr(t) == expected_string
791

792
793
794
795
796
797
798
799
800
    def test_frozenbatchnorm2d_eps(self):
        sample_size = (4, 32, 28, 28)
        x = torch.rand(sample_size)
        state_dict = dict(weight=torch.rand(sample_size[1]),
                          bias=torch.rand(sample_size[1]),
                          running_mean=torch.rand(sample_size[1]),
                          running_var=torch.rand(sample_size[1]),
                          num_batches_tracked=torch.tensor(100))

801
        # Check that default eps is equal to the one of BN
802
803
        fbn = ops.misc.FrozenBatchNorm2d(sample_size[1])
        fbn.load_state_dict(state_dict, strict=False)
804
        bn = torch.nn.BatchNorm2d(sample_size[1]).eval()
805
806
        bn.load_state_dict(state_dict)
        # Difference is expected to fall in an acceptable range
807
        torch.testing.assert_close(fbn(x), bn(x), rtol=1e-5, atol=1e-6)
808
809
810
811
812
813

        # Check computation for eps > 0
        fbn = ops.misc.FrozenBatchNorm2d(sample_size[1], eps=1e-5)
        fbn.load_state_dict(state_dict, strict=False)
        bn = torch.nn.BatchNorm2d(sample_size[1], eps=1e-5).eval()
        bn.load_state_dict(state_dict)
814
        torch.testing.assert_close(fbn(x), bn(x), rtol=1e-5, atol=1e-6)
815
816
817
818

    def test_frozenbatchnorm2d_n_arg(self):
        """Ensure a warning is thrown when passing `n` kwarg
        (remove this when support of `n` is dropped)"""
819
820
        with pytest.warns(DeprecationWarning):
            ops.misc.FrozenBatchNorm2d(32, eps=1e-5, n=32)
821

822

823
class TestBoxConversion:
824
825
826
827
828
829
830
831
    def _get_box_sequences():
        # Define here the argument type of `boxes` supported by region pooling operations
        box_tensor = torch.tensor([[0, 0, 0, 100, 100], [1, 0, 0, 100, 100]], dtype=torch.float)
        box_list = [torch.tensor([[0, 0, 100, 100]], dtype=torch.float),
                    torch.tensor([[0, 0, 100, 100]], dtype=torch.float)]
        box_tuple = tuple(box_list)
        return box_tensor, box_list, box_tuple

832
833
    @pytest.mark.parametrize('box_sequence', _get_box_sequences())
    def test_check_roi_boxes_shape(self, box_sequence):
834
        # Ensure common sequences of tensors are supported
835
        ops._utils.check_roi_boxes_shape(box_sequence)
836

837
838
    @pytest.mark.parametrize('box_sequence', _get_box_sequences())
    def test_convert_boxes_to_roi_format(self, box_sequence):
839
840
        # Ensure common sequences of tensors yield the same result
        ref_tensor = None
841
842
843
844
        if ref_tensor is None:
            ref_tensor = box_sequence
        else:
            assert_equal(ref_tensor, ops._utils.convert_boxes_to_roi_format(box_sequence))
845
846


847
class TestBox:
848
849
850
851
852
853
854
    def test_bbox_same(self):
        box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
                                  [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)

        exp_xyxy = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
                                [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)

855
856
857
858
        assert exp_xyxy.size() == torch.Size([4, 4])
        assert_equal(ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xyxy"), exp_xyxy)
        assert_equal(ops.box_convert(box_tensor, in_fmt="xywh", out_fmt="xywh"), exp_xyxy)
        assert_equal(ops.box_convert(box_tensor, in_fmt="cxcywh", out_fmt="cxcywh"), exp_xyxy)
859
860
861
862
863
864
865
866
867

    def test_bbox_xyxy_xywh(self):
        # Simple test convert boxes to xywh and back. Make sure they are same.
        # box_tensor is in x1 y1 x2 y2 format.
        box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
                                  [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)
        exp_xywh = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
                                [10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float)

868
        assert exp_xywh.size() == torch.Size([4, 4])
869
        box_xywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xywh")
870
        assert_equal(box_xywh, exp_xywh)
871
872
873

        # Reverse conversion
        box_xyxy = ops.box_convert(box_xywh, in_fmt="xywh", out_fmt="xyxy")
874
        assert_equal(box_xyxy, box_tensor)
875
876
877
878
879
880
881
882
883

    def test_bbox_xyxy_cxcywh(self):
        # Simple test convert boxes to xywh and back. Make sure they are same.
        # box_tensor is in x1 y1 x2 y2 format.
        box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
                                  [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)
        exp_cxcywh = torch.tensor([[50, 50, 100, 100], [0, 0, 0, 0],
                                  [20, 25, 20, 20], [58, 65, 70, 60]], dtype=torch.float)

884
        assert exp_cxcywh.size() == torch.Size([4, 4])
885
        box_cxcywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="cxcywh")
886
        assert_equal(box_cxcywh, exp_cxcywh)
887
888
889

        # Reverse conversion
        box_xyxy = ops.box_convert(box_cxcywh, in_fmt="cxcywh", out_fmt="xyxy")
890
        assert_equal(box_xyxy, box_tensor)
891
892
893
894
895
896
897
898
899

    def test_bbox_xywh_cxcywh(self):
        box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
                                  [10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float)

        # This is wrong
        exp_cxcywh = torch.tensor([[50, 50, 100, 100], [0, 0, 0, 0],
                                  [20, 25, 20, 20], [58, 65, 70, 60]], dtype=torch.float)

900
        assert exp_cxcywh.size() == torch.Size([4, 4])
901
        box_cxcywh = ops.box_convert(box_tensor, in_fmt="xywh", out_fmt="cxcywh")
902
        assert_equal(box_cxcywh, exp_cxcywh)
903
904
905

        # Reverse conversion
        box_xywh = ops.box_convert(box_cxcywh, in_fmt="cxcywh", out_fmt="xywh")
906
        assert_equal(box_xywh, box_tensor)
907

908
909
910
    @pytest.mark.parametrize('inv_infmt', ["xwyh", "cxwyh"])
    @pytest.mark.parametrize('inv_outfmt', ["xwcx", "xhwcy"])
    def test_bbox_invalid(self, inv_infmt, inv_outfmt):
911
912
913
        box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
                                  [10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float)

914
915
        with pytest.raises(ValueError):
            ops.box_convert(box_tensor, inv_infmt, inv_outfmt)
916
917
918
919

    def test_bbox_convert_jit(self):
        box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
                                  [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)
920

921
922
        scripted_fn = torch.jit.script(ops.box_convert)
        TOLERANCE = 1e-3
923

924
925
        box_xywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xywh")
        scripted_xywh = scripted_fn(box_tensor, 'xyxy', 'xywh')
926
        torch.testing.assert_close(scripted_xywh, box_xywh, rtol=0.0, atol=TOLERANCE)
927

928
929
        box_cxcywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="cxcywh")
        scripted_cxcywh = scripted_fn(box_tensor, 'xyxy', 'cxcywh')
930
        torch.testing.assert_close(scripted_cxcywh, box_cxcywh, rtol=0.0, atol=TOLERANCE)
931
932


933
class TestBoxArea:
Aditya Oke's avatar
Aditya Oke committed
934
    def test_box_area(self):
935
936
        def area_check(box, expected, tolerance=1e-4):
            out = ops.box_area(box)
937
            torch.testing.assert_close(out, expected, rtol=0.0, check_dtype=False, atol=tolerance)
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958

        # Check for int boxes
        for dtype in [torch.int8, torch.int16, torch.int32, torch.int64]:
            box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0]], dtype=dtype)
            expected = torch.tensor([10000, 0])
            area_check(box_tensor, expected)

        # Check for float32 and float64 boxes
        for dtype in [torch.float32, torch.float64]:
            box_tensor = torch.tensor([[285.3538, 185.5758, 1193.5110, 851.4551],
                                       [285.1472, 188.7374, 1192.4984, 851.0669],
                                       [279.2440, 197.9812, 1189.4746, 849.2019]], dtype=dtype)
            expected = torch.tensor([604723.0806, 600965.4666, 592761.0085], dtype=torch.float64)
            area_check(box_tensor, expected, tolerance=0.05)

        # Check for float16 box
        box_tensor = torch.tensor([[285.25, 185.625, 1194.0, 851.5],
                                   [285.25, 188.75, 1192.0, 851.0],
                                   [279.25, 198.0, 1189.0, 849.0]], dtype=torch.float16)
        expected = torch.tensor([605113.875, 600495.1875, 592247.25])
        area_check(box_tensor, expected)
Aditya Oke's avatar
Aditya Oke committed
959
960


961
class TestBoxIou:
Aditya Oke's avatar
Aditya Oke committed
962
    def test_iou(self):
963
964
        def iou_check(box, expected, tolerance=1e-4):
            out = ops.box_iou(box, box)
965
            torch.testing.assert_close(out, expected, rtol=0.0, check_dtype=False, atol=tolerance)
966
967
968
969
970
971
972
973
974
975
976
977
978
979

        # Check for int boxes
        for dtype in [torch.int16, torch.int32, torch.int64]:
            box = torch.tensor([[0, 0, 100, 100], [0, 0, 50, 50], [200, 200, 300, 300]], dtype=dtype)
            expected = torch.tensor([[1.0, 0.25, 0.0], [0.25, 1.0, 0.0], [0.0, 0.0, 1.0]])
            iou_check(box, expected)

        # Check for float boxes
        for dtype in [torch.float16, torch.float32, torch.float64]:
            box_tensor = torch.tensor([[285.3538, 185.5758, 1193.5110, 851.4551],
                                       [285.1472, 188.7374, 1192.4984, 851.0669],
                                       [279.2440, 197.9812, 1189.4746, 849.2019]], dtype=dtype)
            expected = torch.tensor([[1.0, 0.9933, 0.9673], [0.9933, 1.0, 0.9737], [0.9673, 0.9737, 1.0]])
            iou_check(box_tensor, expected, tolerance=0.002 if dtype == torch.float16 else 1e-4)
Aditya Oke's avatar
Aditya Oke committed
980
981


982
class TestGenBoxIou:
Aditya Oke's avatar
Aditya Oke committed
983
    def test_gen_iou(self):
984
985
        def gen_iou_check(box, expected, tolerance=1e-4):
            out = ops.generalized_box_iou(box, box)
986
            torch.testing.assert_close(out, expected, rtol=0.0, check_dtype=False, atol=tolerance)
987
988
989
990
991
992
993
994
995
996
997
998
999
1000

        # Check for int boxes
        for dtype in [torch.int16, torch.int32, torch.int64]:
            box = torch.tensor([[0, 0, 100, 100], [0, 0, 50, 50], [200, 200, 300, 300]], dtype=dtype)
            expected = torch.tensor([[1.0, 0.25, -0.7778], [0.25, 1.0, -0.8611], [-0.7778, -0.8611, 1.0]])
            gen_iou_check(box, expected)

        # Check for float boxes
        for dtype in [torch.float16, torch.float32, torch.float64]:
            box_tensor = torch.tensor([[285.3538, 185.5758, 1193.5110, 851.4551],
                                       [285.1472, 188.7374, 1192.4984, 851.0669],
                                       [279.2440, 197.9812, 1189.4746, 849.2019]], dtype=dtype)
            expected = torch.tensor([[1.0, 0.9933, 0.9673], [0.9933, 1.0, 0.9737], [0.9673, 0.9737, 1.0]])
            gen_iou_check(box_tensor, expected, tolerance=0.002 if dtype == torch.float16 else 1e-3)
Aditya Oke's avatar
Aditya Oke committed
1001
1002


1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
class TestStochasticDepth:
    @pytest.mark.parametrize('p', [0.2, 0.5, 0.8])
    @pytest.mark.parametrize('mode', ["batch", "row"])
    def test_stochastic_depth(self, mode, p):
        stats = pytest.importorskip("scipy.stats")
        batch_size = 5
        x = torch.ones(size=(batch_size, 3, 4, 4))
        layer = ops.StochasticDepth(p=p, mode=mode).to(device=x.device, dtype=x.dtype)
        layer.__repr__()

        trials = 250
        num_samples = 0
        counts = 0
        for _ in range(trials):
            out = layer(x)
            non_zero_count = out.sum(dim=(1, 2, 3)).nonzero().size(0)
            if mode == "batch":
                if non_zero_count == 0:
                    counts += 1
                num_samples += 1
            elif mode == "row":
                counts += batch_size - non_zero_count
                num_samples += batch_size

        p_value = stats.binom_test(counts, num_samples, p=p)
        assert p_value > 0.0001


1031
if __name__ == '__main__':
1032
    pytest.main([__file__])