Adding Autocast tests for all C++ Ops (#2938)

* Moving Autocast mtest in RoiOpTester. * Adding autocast test on DeformConvTester. * Adding autocast test on NMSTester. * Fixing styles.

Adding Autocast tests for all C++ Ops (#2938)
* Moving Autocast mtest in RoiOpTester. * Adding autocast test on DeformConvTester. * Adding autocast test on NMSTester. * Fixing styles.
dc5d0554 · Vasilis Vryniotis · GitHub · b93d5ee2 · dc5d0554
Unverified Commit dc5d0554 authored Oct 30, 2020 by Vasilis Vryniotis Committed by GitHub Oct 30, 2020
Hide whitespace changes
Inline Side-by-side

Showing with 37 additions and 21 deletions

test/test_ops.py test/test_ops.py +37 -21

No files found.
--- a/test/test_ops.py
+++ b/test/test_ops.py
@@ -120,6 +120,13 @@ class RoIOpTester(OpTester):
    def expected_fn(*args, **kwargs):
        pass

+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
+    def test_autocast(self):
+        for x_dtype in (torch.float, torch.half):
+            for rois_dtype in (torch.float, torch.half):
+                with torch.cuda.amp.autocast():
+                    self._test_forward(torch.device("cuda"), contiguous=False, x_dtype=x_dtype, rois_dtype=rois_dtype)
+

 class RoIPoolTester(RoIOpTester, unittest.TestCase):
    def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs):
@@ -295,13 +302,6 @@ class RoIAlignTester(RoIOpTester, unittest.TestCase):
    def _test_boxes_shape(self):
        self._helper_boxes_shape(ops.roi_align)

-    @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
-    def test_roi_align_autocast(self):
-        for x_dtype in (torch.float, torch.half):
-            for rois_dtype in (torch.float, torch.half):
-                with torch.cuda.amp.autocast():
-                    self._test_forward(torch.device("cuda"), contiguous=False, x_dtype=x_dtype, rois_dtype=rois_dtype)
-

 class PSRoIAlignTester(RoIOpTester, unittest.TestCase):
    def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs):
@@ -425,7 +425,8 @@ class NMSTester(unittest.TestCase):
        self.assertRaises(RuntimeError, ops.nms, torch.rand(3, 4), torch.rand(4), 0.5)

    @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
-    def test_nms_cuda(self):
+    def test_nms_cuda(self, dtype=torch.float64):
+        tol = 1e-3 if dtype is torch.half else 1e-5
        err_msg = 'NMS incompatible between CPU and CUDA for IoU={}'

        for iou in [0.2, 0.5, 0.8]:
@@ -437,9 +438,15 @@ class NMSTester(unittest.TestCase):
            if not is_eq:
                # if the indices are not the same, ensure that it's because the scores
                # are duplicate
-                is_eq = torch.allclose(scores[r_cpu], scores[r_cuda.cpu()])
+                is_eq = torch.allclose(scores[r_cpu], scores[r_cuda.cpu()], rtol=tol, atol=tol)
            self.assertTrue(is_eq, err_msg.format(iou))

+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
+    def test_autocast(self):
+        for dtype in (torch.float, torch.half):
+            with torch.cuda.amp.autocast():
+                self.test_nms_cuda(dtype=dtype)
+

 class NewEmptyTensorTester(unittest.TestCase):
    def test_new_empty_tensor(self):
@@ -492,7 +499,7 @@ class DeformConvTester(OpTester, unittest.TestCase):
        out += bias.view(1, n_out_channels, 1, 1)
        return out

-    def get_fn_args(self, device, contiguous, batch_sz):
+    def get_fn_args(self, device, contiguous, batch_sz, dtype):
        n_in_channels = 6
        n_out_channels = 2
        n_weight_grps = 2
@@ -511,15 +518,15 @@ class DeformConvTester(OpTester, unittest.TestCase):
        out_h = (in_h + 2 * pad_h - (dil_h * (weight_h - 1) + 1)) // stride_h + 1
        out_w = (in_w + 2 * pad_w - (dil_w * (weight_w - 1) + 1)) // stride_w + 1

-        x = torch.rand(batch_sz, n_in_channels, in_h, in_w, device=device, dtype=self.dtype, requires_grad=True)
+        x = torch.rand(batch_sz, n_in_channels, in_h, in_w, device=device, dtype=dtype, requires_grad=True)

        offset = torch.randn(batch_sz, n_offset_grps * 2 * weight_h * weight_w, out_h, out_w,
-                             device=device, dtype=self.dtype, requires_grad=True)
+                             device=device, dtype=dtype, requires_grad=True)

        weight = torch.randn(n_out_channels, n_in_channels // n_weight_grps, weight_h, weight_w,
-                             device=device, dtype=self.dtype, requires_grad=True)
+                             device=device, dtype=dtype, requires_grad=True)

-        bias = torch.randn(n_out_channels, device=device, dtype=self.dtype, requires_grad=True)
+        bias = torch.randn(n_out_channels, device=device, dtype=dtype, requires_grad=True)

        if not contiguous:
            x = x.permute(0, 1, 3, 2).contiguous().permute(0, 1, 3, 2)
@@ -528,26 +535,29 @@ class DeformConvTester(OpTester, unittest.TestCase):

        return x, weight, offset, bias, stride, pad, dilation

-    def _test_forward(self, device, contiguous):
+    def _test_forward(self, device, contiguous, dtype=None):
+        dtype = self.dtype if dtype is None else dtype
        for batch_sz in [0, 33]:
-            self._test_forward_with_batchsize(device, contiguous, batch_sz)
+            self._test_forward_with_batchsize(device, contiguous, batch_sz, dtype)

-    def _test_forward_with_batchsize(self, device, contiguous, batch_sz):
-        x, _, offset, _, stride, padding, dilation = self.get_fn_args(device, contiguous, batch_sz)
+    def _test_forward_with_batchsize(self, device, contiguous, batch_sz, dtype):
+        x, _, offset, _, stride, padding, dilation = self.get_fn_args(device, contiguous, batch_sz, dtype)
        in_channels = 6
        out_channels = 2
        kernel_size = (3, 2)
        groups = 2

        layer = ops.DeformConv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding,
-                                 dilation=dilation, groups=groups).to(device=x.device, dtype=x.dtype)
+                                 dilation=dilation, groups=groups).to(device=x.device, dtype=dtype)
        res = layer(x, offset)

        weight = layer.weight.data
        bias = layer.bias.data
        expected = self.expected_fn(x, weight, offset, bias, stride=stride, padding=padding, dilation=dilation)

-        self.assertTrue(torch.allclose(res, expected), '\nres:\n{}\nexpected:\n{}'.format(res, expected))
+        tol = 1e-3 if dtype is torch.half else 1e-5
+        self.assertTrue(torch.allclose(res.to(expected.dtype), expected, rtol=tol, atol=tol),
+                        '\nres:\n{}\nexpected:\n{}'.format(res, expected))

        # test for wrong sizes
        with self.assertRaises(RuntimeError):
@@ -559,7 +569,7 @@ class DeformConvTester(OpTester, unittest.TestCase):
            self._test_backward_with_batchsize(device, contiguous, batch_sz)

    def _test_backward_with_batchsize(self, device, contiguous, batch_sz):
-        x, weight, offset, bias, stride, padding, dilation = self.get_fn_args(device, contiguous, batch_sz)
+        x, weight, offset, bias, stride, padding, dilation = self.get_fn_args(device, contiguous, batch_sz, self.dtype)

        def func(x_, offset_, weight_, bias_):
            return ops.deform_conv2d(x_, offset_, weight_, bias_, stride=stride, padding=padding, dilation=dilation)
@@ -603,6 +613,12 @@ class DeformConvTester(OpTester, unittest.TestCase):
                    res_grads = init_weight.grad.to("cpu")
                    self.assertTrue(true_cpu_grads.allclose(res_grads))

+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
+    def test_autocast(self):
+        for dtype in (torch.float, torch.half):
+            with torch.cuda.amp.autocast():
+                self._test_forward(torch.device("cuda"), False, dtype=dtype)
+

 class FrozenBNTester(unittest.TestCase):
    def test_frozenbatchnorm2d_repr(self):