Port test_ops.py to pytest (#3953)

a0cd96ff · Nicolas Hug · GitHub · 2a52c2dc · a0cd96ff
Unverified Commit a0cd96ff authored Jun 04, 2021 by Nicolas Hug Committed by GitHub Jun 04, 2021
Hide whitespace changes
Inline Side-by-side

Showing with 208 additions and 208 deletions

test/test_ops.py test/test_ops.py +208 -208

No files found.
--- a/test/test_ops.py
+++ b/test/test_ops.py
-from common_utils import needs_cuda, cpu_only
+from common_utils import needs_cuda, cpu_only, cpu_and_gpu
 from _assert_utils import assert_equal
 import math
-import unittest
+from abc import ABC, abstractmethod
 import pytest
 import numpy as np
@@ -15,48 +15,12 @@ from torchvision import ops
 from typing import Tuple
-class OpTester(object):
+class RoIOpTester(ABC):
-    @classmethod
+    dtype = torch.float64
-    def setUpClass(cls):
-        cls.dtype = torch.float64
-    def test_forward_cpu_contiguous(self):
+    @pytest.mark.parametrize('device', cpu_and_gpu())
-        self._test_forward(device=torch.device('cpu'), contiguous=True)
+    @pytest.mark.parametrize('contiguous', (True, False))
+    def test_forward(self, device, contiguous, x_dtype=None, rois_dtype=None, **kwargs):
-    def test_forward_cpu_non_contiguous(self):
-        self._test_forward(device=torch.device('cpu'), contiguous=False)
-    def test_backward_cpu_contiguous(self):
-        self._test_backward(device=torch.device('cpu'), contiguous=True)
-    def test_backward_cpu_non_contiguous(self):
-        self._test_backward(device=torch.device('cpu'), contiguous=False)
-    @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
-    def test_forward_cuda_contiguous(self):
-        self._test_forward(device=torch.device('cuda'), contiguous=True)
-    @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
-    def test_forward_cuda_non_contiguous(self):
-        self._test_forward(device=torch.device('cuda'), contiguous=False)
-    @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
-    def test_backward_cuda_contiguous(self):
-        self._test_backward(device=torch.device('cuda'), contiguous=True)
-    @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
-    def test_backward_cuda_non_contiguous(self):
-        self._test_backward(device=torch.device('cuda'), contiguous=False)
-    def _test_forward(self, device, contiguous):
-        pass
-    def _test_backward(self, device, contiguous):
-        pass
-class RoIOpTester(OpTester):
-    def _test_forward(self, device, contiguous, x_dtype=None, rois_dtype=None, **kwargs):
        x_dtype = self.dtype if x_dtype is None else x_dtype
        rois_dtype = self.dtype if rois_dtype is None else rois_dtype
        pool_size = 5
@@ -74,14 +38,16 @@ class RoIOpTester(OpTester):
        pool_h, pool_w = pool_size, pool_size
        y = self.fn(x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs)
        # the following should be true whether we're running an autocast test or not.
-        self.assertTrue(y.dtype == x.dtype)
+        assert y.dtype == x.dtype
        gt_y = self.expected_fn(x, rois, pool_h, pool_w, spatial_scale=1,
                                sampling_ratio=-1, device=device, dtype=self.dtype, **kwargs)
        tol = 1e-3 if (x_dtype is torch.half or rois_dtype is torch.half) else 1e-5
        torch.testing.assert_close(gt_y.to(y), y, rtol=tol, atol=tol)
-    def _test_backward(self, device, contiguous):
+    @pytest.mark.parametrize('device', cpu_and_gpu())
+    @pytest.mark.parametrize('contiguous', (True, False))
+    def test_backward(self, device, contiguous):
        pool_size = 2
        x = torch.rand(1, 2 * (pool_size ** 2), 5, 5, dtype=self.dtype, device=device, requires_grad=True)
        if not contiguous:
@@ -96,43 +62,43 @@ class RoIOpTester(OpTester):
        script_func = self.get_script_fn(rois, pool_size)
-        self.assertTrue(gradcheck(func, (x,)))
+        gradcheck(func, (x,))
-        self.assertTrue(gradcheck(script_func, (x,)))
+        gradcheck(script_func, (x,))
-    def test_boxes_shape(self):
+    @needs_cuda
-        self._test_boxes_shape()
+    @pytest.mark.parametrize('x_dtype', (torch.float, torch.half))
+    @pytest.mark.parametrize('rois_dtype', (torch.float, torch.half))
+    def test_autocast(self, x_dtype, rois_dtype):
+        with torch.cuda.amp.autocast():
+            self.test_forward(torch.device("cuda"), contiguous=False, x_dtype=x_dtype, rois_dtype=rois_dtype)
    def _helper_boxes_shape(self, func):
        # test boxes as Tensor[N, 5]
-        with self.assertRaises(AssertionError):
+        with pytest.raises(AssertionError):
            a = torch.linspace(1, 8 * 8, 8 * 8).reshape(1, 1, 8, 8)
            boxes = torch.tensor([[0, 0, 3, 3]], dtype=a.dtype)
            func(a, boxes, output_size=(2, 2))
        # test boxes as List[Tensor[N, 4]]
-        with self.assertRaises(AssertionError):
+        with pytest.raises(AssertionError):
            a = torch.linspace(1, 8 * 8, 8 * 8).reshape(1, 1, 8, 8)
            boxes = torch.tensor([[0, 0, 3]], dtype=a.dtype)
            ops.roi_pool(a, [boxes], output_size=(2, 2))
+    @abstractmethod
    def fn(*args, **kwargs):
        pass
+    @abstractmethod
    def get_script_fn(*args, **kwargs):
        pass
+    @abstractmethod
    def expected_fn(*args, **kwargs):
        pass
-    @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
-    def test_autocast(self):
-        for x_dtype in (torch.float, torch.half):
-            for rois_dtype in (torch.float, torch.half):
-                with torch.cuda.amp.autocast():
-                    self._test_forward(torch.device("cuda"), contiguous=False, x_dtype=x_dtype, rois_dtype=rois_dtype)
+class TestRoiPool(RoIOpTester):
-class RoIPoolTester(RoIOpTester, unittest.TestCase):
    def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs):
        return ops.RoIPool((pool_h, pool_w), spatial_scale)(x, rois)
@@ -167,11 +133,12 @@ class RoIPoolTester(RoIOpTester, unittest.TestCase):
                        y[roi_idx, :, i, j] = bin_x.reshape(n_channels, -1).max(dim=1)[0]
        return y
-    def _test_boxes_shape(self):
+    @cpu_only
+    def test_boxes_shape(self):
        self._helper_boxes_shape(ops.roi_pool)
-class PSRoIPoolTester(RoIOpTester, unittest.TestCase):
+class TestPSRoIPool(RoIOpTester):
    def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs):
        return ops.PSRoIPool((pool_h, pool_w), 1)(x, rois)
@@ -184,7 +151,7 @@ class PSRoIPoolTester(RoIOpTester, unittest.TestCase):
        if device is None:
            device = torch.device("cpu")
        n_input_channels = x.size(1)
-        self.assertEqual(n_input_channels % (pool_h * pool_w), 0, "input channels must be divisible by ph * pw")
+        assert n_input_channels % (pool_h * pool_w) == 0, "input channels must be divisible by ph * pw"
        n_output_channels = int(n_input_channels / (pool_h * pool_w))
        y = torch.zeros(rois.size(0), n_output_channels, pool_h, pool_w, dtype=dtype, device=device)
@@ -211,7 +178,8 @@ class PSRoIPoolTester(RoIOpTester, unittest.TestCase):
                            y[roi_idx, c_out, i, j] = t / area
        return y
-    def _test_boxes_shape(self):
+    @cpu_only
+    def test_boxes_shape(self):
        self._helper_boxes_shape(ops.ps_roi_pool)
@@ -247,7 +215,7 @@ def bilinear_interpolate(data, y, x, snap_border=False):
    return val
-class RoIAlignTester(RoIOpTester, unittest.TestCase):
+class TestRoIAlign(RoIOpTester):
    def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, aligned=False, **kwargs):
        return ops.RoIAlign((pool_h, pool_w), spatial_scale=spatial_scale,
                            sampling_ratio=sampling_ratio, aligned=aligned)(x, rois)
@@ -294,14 +262,36 @@ class RoIAlignTester(RoIOpTester, unittest.TestCase):
                        out_data[r, channel, i, j] = val
        return out_data
-    def _test_boxes_shape(self):
+    @cpu_only
+    def test_boxes_shape(self):
        self._helper_boxes_shape(ops.roi_align)
-    def _test_forward(self, device, contiguous, x_dtype=None, rois_dtype=None, **kwargs):
+    @pytest.mark.parametrize('aligned', (True, False))
-        for aligned in (True, False):
+    @pytest.mark.parametrize('device', cpu_and_gpu())
-            super()._test_forward(device, contiguous, x_dtype, rois_dtype, aligned=aligned)
+    @pytest.mark.parametrize('contiguous', (True, False))
+    def test_forward(self, device, contiguous, aligned, x_dtype=None, rois_dtype=None):
+        super().test_forward(device=device, contiguous=contiguous, x_dtype=x_dtype, rois_dtype=rois_dtype,
+                             aligned=aligned)
-    def test_qroialign(self):
+    @needs_cuda
+    @pytest.mark.parametrize('aligned', (True, False))
+    @pytest.mark.parametrize('x_dtype', (torch.float, torch.half))
+    @pytest.mark.parametrize('rois_dtype', (torch.float, torch.half))
+    def test_autocast(self, aligned, x_dtype, rois_dtype):
+        with torch.cuda.amp.autocast():
+            self.test_forward(torch.device("cuda"), contiguous=False, aligned=aligned, x_dtype=x_dtype,
+                              rois_dtype=rois_dtype)
+    def _make_rois(self, img_size, num_imgs, dtype, num_rois=1000):
+        rois = torch.randint(0, img_size // 2, size=(num_rois, 5)).to(dtype)
+        rois[:, 0] = torch.randint(0, num_imgs, size=(num_rois,))  # set batch index
+        rois[:, 3:] += rois[:, 1:3]  # make sure boxes aren't degenerate
+        return rois
+    @pytest.mark.parametrize('aligned', (True, False))
+    @pytest.mark.parametrize('scale, zero_point', ((1, 0), (2, 10), (0.1, 50)))
+    @pytest.mark.parametrize('qdtype', (torch.qint8, torch.quint8, torch.qint32))
+    def test_qroialign(self, aligned, scale, zero_point, qdtype):
        """Make sure quantized version of RoIAlign is close to float version"""
        pool_size = 5
        img_size = 10
@@ -309,72 +299,64 @@ class RoIAlignTester(RoIOpTester, unittest.TestCase):
        num_imgs = 1
        dtype = torch.float
-        def make_rois(num_rois=1000):
+        x = torch.randint(50, 100, size=(num_imgs, n_channels, img_size, img_size)).to(dtype)
-            rois = torch.randint(0, img_size // 2, size=(num_rois, 5)).to(dtype)
+        qx = torch.quantize_per_tensor(x, scale=scale, zero_point=zero_point, dtype=qdtype)
-            rois[:, 0] = torch.randint(0, num_imgs, size=(num_rois,))  # set batch index
-            rois[:, 3:] += rois[:, 1:3]  # make sure boxes aren't degenerate
+        rois = self._make_rois(img_size, num_imgs, dtype)
-            return rois
+        qrois = torch.quantize_per_tensor(rois, scale=scale, zero_point=zero_point, dtype=qdtype)
-        for aligned in (True, False):
+        x, rois = qx.dequantize(), qrois.dequantize()  # we want to pass the same inputs
-            for scale, zero_point in ((1, 0), (2, 10), (0.1, 50)):
-                for qdtype in (torch.qint8, torch.quint8, torch.qint32):
-                    x = torch.randint(50, 100, size=(num_imgs, n_channels, img_size, img_size)).to(dtype)
-                    qx = torch.quantize_per_tensor(x, scale=scale, zero_point=zero_point, dtype=qdtype)
-                    rois = make_rois()
-                    qrois = torch.quantize_per_tensor(rois, scale=scale, zero_point=zero_point, dtype=qdtype)
-                    x, rois = qx.dequantize(), qrois.dequantize()  # we want to pass the same inputs
-                    y = ops.roi_align(
-                        x,
-                        rois,
-                        output_size=pool_size,
-                        spatial_scale=1,
-                        sampling_ratio=-1,
-                        aligned=aligned,
-                    )
-                    qy = ops.roi_align(
-                        qx,
-                        qrois,
-                        output_size=pool_size,
-                        spatial_scale=1,
-                        sampling_ratio=-1,
-                        aligned=aligned,
-                    )
-                    # The output qy is itself a quantized tensor and there might have been a loss of info when it was
-                    # quantized. For a fair comparison we need to quantize y as well
-                    quantized_float_y = torch.quantize_per_tensor(y, scale=scale, zero_point=zero_point, dtype=qdtype)
-                    try:
-                        # Ideally, we would assert this, which passes with (scale, zero) == (1, 0)
-                        self.assertTrue((qy == quantized_float_y).all())
-                    except AssertionError:
-                        # But because the computation aren't exactly the same between the 2 RoIAlign procedures, some
-                        # rounding error may lead to a difference of 2 in the output.
-                        # For example with (scale, zero) = (2, 10), 45.00000... will be quantized to 44
-                        # but 45.00000001 will be rounded to 46. We make sure below that:
-                        # - such discrepancies between qy and quantized_float_y are very rare (less then 5%)
-                        # - any difference between qy and quantized_float_y is == scale
-                        diff_idx = torch.where(qy != quantized_float_y)
-                        num_diff = diff_idx[0].numel()
-                        self.assertTrue(num_diff / qy.numel() < .05)
-                        abs_diff = torch.abs(qy[diff_idx].dequantize() - quantized_float_y[diff_idx].dequantize())
-                        t_scale = torch.full_like(abs_diff, fill_value=scale)
-                        torch.testing.assert_close(abs_diff, t_scale, rtol=1e-5, atol=1e-5)
+        y = ops.roi_align(
+            x,
+            rois,
+            output_size=pool_size,
+            spatial_scale=1,
+            sampling_ratio=-1,
+            aligned=aligned,
+        )
+        qy = ops.roi_align(
+            qx,
+            qrois,
+            output_size=pool_size,
+            spatial_scale=1,
+            sampling_ratio=-1,
+            aligned=aligned,
+        )
+        # The output qy is itself a quantized tensor and there might have been a loss of info when it was
+        # quantized. For a fair comparison we need to quantize y as well
+        quantized_float_y = torch.quantize_per_tensor(y, scale=scale, zero_point=zero_point, dtype=qdtype)
+        try:
+            # Ideally, we would assert this, which passes with (scale, zero) == (1, 0)
+            assert (qy == quantized_float_y).all()
+        except AssertionError:
+            # But because the computation aren't exactly the same between the 2 RoIAlign procedures, some
+            # rounding error may lead to a difference of 2 in the output.
+            # For example with (scale, zero) = (2, 10), 45.00000... will be quantized to 44
+            # but 45.00000001 will be rounded to 46. We make sure below that:
+            # - such discrepancies between qy and quantized_float_y are very rare (less then 5%)
+            # - any difference between qy and quantized_float_y is == scale
+            diff_idx = torch.where(qy != quantized_float_y)
+            num_diff = diff_idx[0].numel()
+            assert num_diff / qy.numel() < .05
+            abs_diff = torch.abs(qy[diff_idx].dequantize() - quantized_float_y[diff_idx].dequantize())
+            t_scale = torch.full_like(abs_diff, fill_value=scale)
+            torch.testing.assert_close(abs_diff, t_scale, rtol=1e-5, atol=1e-5)
+    def test_qroi_align_multiple_images(self):
+        dtype = torch.float
        x = torch.randint(50, 100, size=(2, 3, 10, 10)).to(dtype)
        qx = torch.quantize_per_tensor(x, scale=1, zero_point=0, dtype=torch.qint8)
-        rois = make_rois(10)
+        rois = self._make_rois(img_size=10, num_imgs=2, dtype=dtype, num_rois=10)
        qrois = torch.quantize_per_tensor(rois, scale=1, zero_point=0, dtype=torch.qint8)
-        with self.assertRaisesRegex(RuntimeError, "Only one image per batch is allowed"):
+        with pytest.raises(RuntimeError, match="Only one image per batch is allowed"):
-            ops.roi_align(qx, qrois, output_size=pool_size)
+            ops.roi_align(qx, qrois, output_size=5)
-class PSRoIAlignTester(RoIOpTester, unittest.TestCase):
+class TestPSRoIAlign(RoIOpTester):
    def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs):
        return ops.PSRoIAlign((pool_h, pool_w), spatial_scale=spatial_scale,
                              sampling_ratio=sampling_ratio)(x, rois)
@@ -388,7 +370,7 @@ class PSRoIAlignTester(RoIOpTester, unittest.TestCase):
        if device is None:
            device = torch.device("cpu")
        n_input_channels = in_data.size(1)
-        self.assertEqual(n_input_channels % (pool_h * pool_w), 0, "input channels must be divisible by ph * pw")
+        assert n_input_channels % (pool_h * pool_w) == 0, "input channels must be divisible by ph * pw"
        n_output_channels = int(n_input_channels / (pool_h * pool_w))
        out_data = torch.zeros(rois.size(0), n_output_channels, pool_h, pool_w, dtype=dtype, device=device)
@@ -421,11 +403,13 @@ class PSRoIAlignTester(RoIOpTester, unittest.TestCase):
                        out_data[r, c_out, i, j] = val
        return out_data
-    def _test_boxes_shape(self):
+    @cpu_only
+    def test_boxes_shape(self):
        self._helper_boxes_shape(ops.ps_roi_align)
-class MultiScaleRoIAlignTester(unittest.TestCase):
+@cpu_only
+class TestMultiScaleRoIAlign:
    def test_msroialign_repr(self):
        fmap_names = ['0']
        output_size = (7, 7)
@@ -436,7 +420,7 @@ class MultiScaleRoIAlignTester(unittest.TestCase):
        # Check integrity of object __repr__ attribute
        expected_string = (f"MultiScaleRoIAlign(featmap_names={fmap_names}, output_size={output_size}, "
                           f"sampling_ratio={sampling_ratio})")
-        self.assertEqual(t.__repr__(), expected_string)
+        assert repr(t) == expected_string
 class TestNMS:
@@ -583,7 +567,9 @@ class TestNMS:
        torch.testing.assert_close(empty, ops.batched_nms(empty, None, None, None))
-class DeformConvTester(OpTester, unittest.TestCase):
+class TestDeformConv:
+    dtype = torch.float64
    def expected_fn(self, x, weight, offset, mask, bias, stride=1, padding=0, dilation=1):
        stride_h, stride_w = _pair(stride)
        pad_h, pad_w = _pair(padding)
@@ -671,12 +657,11 @@ class DeformConvTester(OpTester, unittest.TestCase):
        return x, weight, offset, mask, bias, stride, pad, dilation
-    def _test_forward(self, device, contiguous, dtype=None):
+    @pytest.mark.parametrize('device', cpu_and_gpu())
-        dtype = self.dtype if dtype is None else dtype
+    @pytest.mark.parametrize('contiguous', (True, False))
-        for batch_sz in [0, 33]:
+    @pytest.mark.parametrize('batch_sz', (0, 33))
-            self._test_forward_with_batchsize(device, contiguous, batch_sz, dtype)
+    def test_forward(self, device, contiguous, batch_sz, dtype=None):
+        dtype = dtype or self.dtype
-    def _test_forward_with_batchsize(self, device, contiguous, batch_sz, dtype):
        x, _, offset, mask, _, stride, padding, dilation = self.get_fn_args(device, contiguous, batch_sz, dtype)
        in_channels = 6
        out_channels = 2
@@ -704,20 +689,28 @@ class DeformConvTester(OpTester, unittest.TestCase):
            res.to(expected), expected, rtol=tol, atol=tol, msg='\nres:\n{}\nexpected:\n{}'.format(res, expected)
        )
-        # test for wrong sizes
+    @cpu_only
-        with self.assertRaises(RuntimeError):
+    def test_wrong_sizes(self):
+        in_channels = 6
+        out_channels = 2
+        kernel_size = (3, 2)
+        groups = 2
+        x, _, offset, mask, _, stride, padding, dilation = self.get_fn_args('cpu', contiguous=True,
+                                                                            batch_sz=10, dtype=self.dtype)
+        layer = ops.DeformConv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding,
+                                 dilation=dilation, groups=groups)
+        with pytest.raises(RuntimeError, match="the shape of the offset"):
            wrong_offset = torch.rand_like(offset[:, :2])
-            res = layer(x, wrong_offset)
+            layer(x, wrong_offset)
-        with self.assertRaises(RuntimeError):
+        with pytest.raises(RuntimeError, match=r'mask.shape\[1\] is not valid'):
            wrong_mask = torch.rand_like(mask[:, :2])
-            res = layer(x, offset, wrong_mask)
+            layer(x, offset, wrong_mask)
-    def _test_backward(self, device, contiguous):
+    @pytest.mark.parametrize('device', cpu_and_gpu())
-        for batch_sz in [0, 33]:
+    @pytest.mark.parametrize('contiguous', (True, False))
-            self._test_backward_with_batchsize(device, contiguous, batch_sz)
+    @pytest.mark.parametrize('batch_sz', (0, 33))
+    def test_backward(self, device, contiguous, batch_sz):
-    def _test_backward_with_batchsize(self, device, contiguous, batch_sz):
        x, weight, offset, mask, bias, stride, padding, dilation = self.get_fn_args(device, contiguous,
                                                                                    batch_sz, self.dtype)
@@ -751,47 +744,50 @@ class DeformConvTester(OpTester, unittest.TestCase):
        gradcheck(lambda z, off, wei, bi: script_func_no_mask(z, off, wei, bi, stride, padding, dilation),
                  (x, offset, weight, bias), nondet_tol=1e-5, fast_mode=True)
-    @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
+    @needs_cuda
-    def test_compare_cpu_cuda_grads(self):
+    @pytest.mark.parametrize('contiguous', (True, False))
+    def test_compare_cpu_cuda_grads(self, contiguous):
        # Test from https://github.com/pytorch/vision/issues/2598
        # Run on CUDA only
-        for contiguous in [False, True]:
-            # compare grads computed on CUDA with grads computed on CPU
-            true_cpu_grads = None
-            init_weight = torch.randn(9, 9, 3, 3, requires_grad=True)
-            img = torch.randn(8, 9, 1000, 110)
-            offset = torch.rand(8, 2 * 3 * 3, 1000, 110)
-            mask = torch.rand(8, 3 * 3, 1000, 110)
-            if not contiguous:
-                img = img.permute(0, 1, 3, 2).contiguous().permute(0, 1, 3, 2)
-                offset = offset.permute(1, 3, 0, 2).contiguous().permute(2, 0, 3, 1)
-                mask = mask.permute(1, 3, 0, 2).contiguous().permute(2, 0, 3, 1)
-                weight = init_weight.permute(3, 2, 0, 1).contiguous().permute(2, 3, 1, 0)
-            else:
-                weight = init_weight
-            for d in ["cpu", "cuda"]:
+        # compare grads computed on CUDA with grads computed on CPU
+        true_cpu_grads = None
-                out = ops.deform_conv2d(img.to(d), offset.to(d), weight.to(d), padding=1, mask=mask.to(d))
+        init_weight = torch.randn(9, 9, 3, 3, requires_grad=True)
-                out.mean().backward()
+        img = torch.randn(8, 9, 1000, 110)
-                if true_cpu_grads is None:
+        offset = torch.rand(8, 2 * 3 * 3, 1000, 110)
-                    true_cpu_grads = init_weight.grad
+        mask = torch.rand(8, 3 * 3, 1000, 110)
-                    self.assertTrue(true_cpu_grads is not None)
-                else:
-                    self.assertTrue(init_weight.grad is not None)
-                    res_grads = init_weight.grad.to("cpu")
-                    torch.testing.assert_close(true_cpu_grads, res_grads)
-    @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
+        if not contiguous:
-    def test_autocast(self):
+            img = img.permute(0, 1, 3, 2).contiguous().permute(0, 1, 3, 2)
-        for dtype in (torch.float, torch.half):
+            offset = offset.permute(1, 3, 0, 2).contiguous().permute(2, 0, 3, 1)
-            with torch.cuda.amp.autocast():
+            mask = mask.permute(1, 3, 0, 2).contiguous().permute(2, 0, 3, 1)
-                self._test_forward(torch.device("cuda"), False, dtype=dtype)
+            weight = init_weight.permute(3, 2, 0, 1).contiguous().permute(2, 3, 1, 0)
+        else:
+            weight = init_weight
+        for d in ["cpu", "cuda"]:
-class FrozenBNTester(unittest.TestCase):
+            out = ops.deform_conv2d(img.to(d), offset.to(d), weight.to(d), padding=1, mask=mask.to(d))
+            out.mean().backward()
+            if true_cpu_grads is None:
+                true_cpu_grads = init_weight.grad
+                assert true_cpu_grads is not None
+            else:
+                assert init_weight.grad is not None
+                res_grads = init_weight.grad.to("cpu")
+                torch.testing.assert_close(true_cpu_grads, res_grads)
+    @needs_cuda
+    @pytest.mark.parametrize('batch_sz', (0, 33))
+    @pytest.mark.parametrize('dtype', (torch.float, torch.half))
+    def test_autocast(self, batch_sz, dtype):
+        with torch.cuda.amp.autocast():
+            self.test_forward(torch.device("cuda"), contiguous=False, batch_sz=batch_sz, dtype=dtype)
+@cpu_only
+class TestFrozenBNT:
    def test_frozenbatchnorm2d_repr(self):
        num_features = 32
        eps = 1e-5
@@ -799,7 +795,7 @@ class FrozenBNTester(unittest.TestCase):
        # Check integrity of object __repr__ attribute
        expected_string = f"FrozenBatchNorm2d({num_features}, eps={eps})"
-        self.assertEqual(t.__repr__(), expected_string)
+        assert repr(t) == expected_string
    def test_frozenbatchnorm2d_eps(self):
        sample_size = (4, 32, 28, 28)
@@ -828,11 +824,12 @@ class FrozenBNTester(unittest.TestCase):
    def test_frozenbatchnorm2d_n_arg(self):
        """Ensure a warning is thrown when passing `n` kwarg
        (remove this when support of `n` is dropped)"""
-        self.assertWarns(DeprecationWarning, ops.misc.FrozenBatchNorm2d, 32, eps=1e-5, n=32)
+        with pytest.warns(DeprecationWarning):
+            ops.misc.FrozenBatchNorm2d(32, eps=1e-5, n=32)
-class BoxConversionTester(unittest.TestCase):
+@cpu_only
-    @staticmethod
+class TestBoxConversion:
    def _get_box_sequences():
        # Define here the argument type of `boxes` supported by region pooling operations
        box_tensor = torch.tensor([[0, 0, 0, 100, 100], [1, 0, 0, 100, 100]], dtype=torch.float)
@@ -841,22 +838,23 @@ class BoxConversionTester(unittest.TestCase):
        box_tuple = tuple(box_list)
        return box_tensor, box_list, box_tuple
-    def test_check_roi_boxes_shape(self):
+    @pytest.mark.parametrize('box_sequence', _get_box_sequences())
+    def test_check_roi_boxes_shape(self, box_sequence):
        # Ensure common sequences of tensors are supported
-        for box_sequence in self._get_box_sequences():
+        ops._utils.check_roi_boxes_shape(box_sequence)
-            self.assertIsNone(ops._utils.check_roi_boxes_shape(box_sequence))
-    def test_convert_boxes_to_roi_format(self):
+    @pytest.mark.parametrize('box_sequence', _get_box_sequences())
+    def test_convert_boxes_to_roi_format(self, box_sequence):
        # Ensure common sequences of tensors yield the same result
        ref_tensor = None
-        for box_sequence in self._get_box_sequences():
+        if ref_tensor is None:
-            if ref_tensor is None:
+            ref_tensor = box_sequence
-                ref_tensor = box_sequence
+        else:
-            else:
+            assert_equal(ref_tensor, ops._utils.convert_boxes_to_roi_format(box_sequence))
-                self.assertTrue(torch.equal(ref_tensor, ops._utils.convert_boxes_to_roi_format(box_sequence)))
-class BoxTester(unittest.TestCase):
+@cpu_only
+class TestBox:
    def test_bbox_same(self):
        box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
                                  [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)
@@ -917,15 +915,14 @@ class BoxTester(unittest.TestCase):
        box_xywh = ops.box_convert(box_cxcywh, in_fmt="cxcywh", out_fmt="xywh")
        assert_equal(box_xywh, box_tensor)
-    def test_bbox_invalid(self):
+    @pytest.mark.parametrize('inv_infmt', ["xwyh", "cxwyh"])
+    @pytest.mark.parametrize('inv_outfmt', ["xwcx", "xhwcy"])
+    def test_bbox_invalid(self, inv_infmt, inv_outfmt):
        box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
                                  [10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float)
-        invalid_infmts = ["xwyh", "cxwyh"]
+        with pytest.raises(ValueError):
-        invalid_outfmts = ["xwcx", "xhwcy"]
+            ops.box_convert(box_tensor, inv_infmt, inv_outfmt)
-        for inv_infmt in invalid_infmts:
-            for inv_outfmt in invalid_outfmts:
-                self.assertRaises(ValueError, ops.box_convert, box_tensor, inv_infmt, inv_outfmt)
    def test_bbox_convert_jit(self):
        box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
@@ -943,7 +940,8 @@ class BoxTester(unittest.TestCase):
        torch.testing.assert_close(scripted_cxcywh, box_cxcywh, rtol=0.0, atol=TOLERANCE)
-class BoxAreaTester(unittest.TestCase):
+@cpu_only
+class TestBoxArea:
    def test_box_area(self):
        def area_check(box, expected, tolerance=1e-4):
            out = ops.box_area(box)
@@ -971,7 +969,8 @@ class BoxAreaTester(unittest.TestCase):
        area_check(box_tensor, expected)
-class BoxIouTester(unittest.TestCase):
+@cpu_only
+class TestBoxIou:
    def test_iou(self):
        def iou_check(box, expected, tolerance=1e-4):
            out = ops.box_iou(box, box)
@@ -992,7 +991,8 @@ class BoxIouTester(unittest.TestCase):
            iou_check(box_tensor, expected, tolerance=0.002 if dtype == torch.float16 else 1e-4)
-class GenBoxIouTester(unittest.TestCase):
+@cpu_only
+class TestGenBoxIou:
    def test_gen_iou(self):
        def gen_iou_check(box, expected, tolerance=1e-4):
            out = ops.generalized_box_iou(box, box)
@@ -1014,4 +1014,4 @@ class GenBoxIouTester(unittest.TestCase):
 if __name__ == '__main__':
-    unittest.main()
+    pytest.main([__file__])