[Feature] Add several MLU ops (#1563)

* [Feature] Add roiaware pool3d ops from mmdet3d (#1382) * add ops (roiaware pool3d) in mmdet3d * refactor code * fix typo Co-authored-by: zhouzaida <zhouzaida@163.com> * [Feature] Add iou3d op from mmdet3d (#1356) * add ops (iou3d) in mmdet3d * add unit test * refactor code * refactor code * refactor code * refactor code * refactor code Co-authored-by: zhouzaida <zhouzaida@163.com> * [Fix] Update test data for test_iou3d (#1427) * Update test data for test_iou3d * delete blank lines Co-authored-by: Zaida Zhou <58739961+zhouzaida@users.noreply.github.com> * [Feature] Add group points ops from mmdet3d (#1415) * add op (group points) and its related ops (ball query and knn) in mmdet3d * refactor code * fix typo * refactor code * fix typo * refactor code * make input contiguous Co-authored-by: zhouzaida <zhouzaida@163.com> * add mmdet3d op (#1425) Co-authored-by: zhouzaida <zhouzaida@163....

[Feature] Add several MLU ops (#1563)
* [Feature] Add roiaware pool3d ops from mmdet3d (#1382) * add ops (roiaware pool3d) in mmdet3d * refactor code * fix typo Co-authored-by: zhouzaida <zhouzaida@163.com> * [Feature] Add iou3d op from mmdet3d (#1356) * add ops (iou3d) in mmdet3d * add unit test * refactor code * refactor code * refactor code * refactor code * refactor code Co-authored-by: zhouzaida <zhouzaida@163.com> * [Fix] Update test data for test_iou3d (#1427) * Update test data for test_iou3d * delete blank lines Co-authored-by: Zaida Zhou <58739961+zhouzaida@users.noreply.github.com> * [Feature] Add group points ops from mmdet3d (#1415) * add op (group points) and its related ops (ball query and knn) in mmdet3d * refactor code * fix typo * refactor code * fix typo * refactor code * make input contiguous Co-authored-by: zhouzaida <zhouzaida@163.com> * add mmdet3d op (#1425) Co-authored-by: zhouzaida <zhouzaida@163....
362a90f8 · Jiazhen Wang · GitHub · 95273020 · 362a90f8 · 362a90f8
Unverified Commit 362a90f8 authored Apr 16, 2022 by Jiazhen Wang Committed by GitHub Apr 16, 2022
6 changed files
--- a/tests/test_device/test_mlu/test_mlu_parallel.py
+++ b/tests/test_device/test_mlu/test_mlu_parallel.py
@@ -5,7 +5,8 @@ import pytest
 import torch
 import torch.nn as nn

-from mmcv.device.mlu import IS_MLU, MLUDataParallel, MLUDistributedDataParallel
+from mmcv.device.mlu import (IS_MLU_AVAILABLE, MLUDataParallel,
+                             MLUDistributedDataParallel)
 from mmcv.device.mlu._functions import Scatter, scatter
 from mmcv.parallel import is_module_wrapper

@@ -31,7 +32,7 @@ def test_is_module_wrapper():
    model = Model()
    assert not is_module_wrapper(model)

-    if IS_MLU:
+    if IS_MLU_AVAILABLE:
        mludp = MLUDataParallel(model)
        assert is_module_wrapper(mludp)

@@ -51,7 +52,7 @@ def test_scatter():
        assert torch.allclose(input, output)

    # if the device is MLU, copy the input from CPU to MLU
-    if IS_MLU:
+    if IS_MLU_AVAILABLE:
        input = torch.zeros([1, 3, 3, 3])
        output = scatter(input=input, devices=[0])
        assert torch.allclose(input.to('mlu'), output)
@@ -82,7 +83,7 @@ def test_Scatter():
        assert torch.allclose(input, output)

    # if the device is MLU, copy the input from CPU to MLU
-    if IS_MLU:
+    if IS_MLU_AVAILABLE:
        target_mlus = [0]
        input = torch.zeros([1, 3, 3, 3])
        outputs = Scatter.forward(target_mlus, input)

--- a/tests/test_ops/test_bbox.py
+++ b/tests/test_ops/test_bbox.py
@@ -3,41 +3,60 @@ import numpy as np
 import pytest
 import torch

+from mmcv.device.mlu import IS_MLU_AVAILABLE
+from mmcv.utils import IS_CUDA_AVAILABLE

-@pytest.mark.skipif(
-    not torch.cuda.is_available(), reason='requires CUDA support')
-class TestBBox(object):

-    def _test_bbox_overlaps(self, dtype=torch.float):
+class TestBBox(object):

+    def _test_bbox_overlaps(self, device, dtype=torch.float):
        from mmcv.ops import bbox_overlaps
        b1 = torch.tensor([[1.0, 1.0, 3.0, 4.0], [2.0, 2.0, 3.0, 4.0],
-                           [7.0, 7.0, 8.0, 8.0]]).cuda().type(dtype)
+                           [7.0, 7.0, 8.0, 8.0]]).to(device).type(dtype)
        b2 = torch.tensor([[0.0, 2.0, 2.0, 5.0], [2.0, 1.0, 3.0,
-                                                  3.0]]).cuda().type(dtype)
+                                                  3.0]]).to(device).type(dtype)
        should_output = np.array([[0.33333334, 0.5], [0.2, 0.5], [0.0, 0.0]])
        out = bbox_overlaps(b1, b2, offset=1)
        assert np.allclose(out.cpu().numpy(), should_output, 1e-2)

        b1 = torch.tensor([[1.0, 1.0, 3.0, 4.0], [2.0, 2.0, 3.0,
-                                                  4.0]]).cuda().type(dtype)
+                                                  4.0]]).to(device).type(dtype)
        b2 = torch.tensor([[0.0, 2.0, 2.0, 5.0], [2.0, 1.0, 3.0,
-                                                  3.0]]).cuda().type(dtype)
+                                                  3.0]]).to(device).type(dtype)
        should_output = np.array([0.33333334, 0.5])
        out = bbox_overlaps(b1, b2, aligned=True, offset=1)
        assert np.allclose(out.cpu().numpy(), should_output, 1e-2)

-        b1 = torch.tensor([[0.0, 0.0, 3.0, 3.0]]).cuda().type(dtype)
-        b1 = torch.tensor([[0.0, 0.0, 3.0, 3.0]]).cuda().type(dtype)
+        b1 = torch.tensor([[0.0, 0.0, 3.0, 3.0]]).to(device).type(dtype)
        b2 = torch.tensor([[4.0, 0.0, 5.0, 3.0], [3.0, 0.0, 4.0, 3.0],
                           [2.0, 0.0, 3.0, 3.0], [1.0, 0.0, 2.0,
-                                                  3.0]]).cuda().type(dtype)
+                                                  3.0]]).to(device).type(dtype)
        should_output = np.array([0, 0.2, 0.5, 0.5])
        out = bbox_overlaps(b1, b2, offset=1)
        assert np.allclose(out.cpu().numpy(), should_output, 1e-2)

-    def test_bbox_overlaps_float(self):
-        self._test_bbox_overlaps(torch.float)
-
-    def test_bbox_overlaps_half(self):
-        self._test_bbox_overlaps(torch.half)
+    @pytest.mark.parametrize('device', [
+        pytest.param(
+            'cuda',
+            marks=pytest.mark.skipif(
+                not IS_CUDA_AVAILABLE, reason='requires CUDA support')),
+        pytest.param(
+            'mlu',
+            marks=pytest.mark.skipif(
+                not IS_MLU_AVAILABLE, reason='requires MLU support'))
+    ])
+    def test_bbox_overlaps_float(self, device):
+        self._test_bbox_overlaps(device, dtype=torch.float)
+
+    @pytest.mark.parametrize('device', [
+        pytest.param(
+            'cuda',
+            marks=pytest.mark.skipif(
+                not IS_CUDA_AVAILABLE, reason='requires CUDA support')),
+        pytest.param(
+            'mlu',
+            marks=pytest.mark.skipif(
+                not IS_MLU_AVAILABLE, reason='requires MLU support'))
+    ])
+    def test_bbox_overlaps_half(self, device):
+        self._test_bbox_overlaps(device, dtype=torch.half)
--- a/tests/test_ops/test_focal_loss.py
+++ b/tests/test_ops/test_focal_loss.py
 # Copyright (c) OpenMMLab. All rights reserved.
 import numpy as np
+import pytest
 import torch

+from mmcv.device.mlu import IS_MLU_AVAILABLE
+from mmcv.utils import IS_CUDA_AVAILABLE
+
 _USING_PARROTS = True
 try:
    from parrots.autograd import gradcheck
@@ -57,9 +61,7 @@ class Testfocalloss(object):
            assert np.allclose(loss.data.cpu().numpy(), output[0], 1e-2)
            assert np.allclose(x.grad.data.cpu(), np_x_grad, 1e-2)

-    def _test_sigmoid(self, dtype=torch.float):
-        if not torch.cuda.is_available():
-            return
+    def _test_sigmoid(self, device, dtype=torch.float):
        from mmcv.ops import sigmoid_focal_loss
        alpha = 0.25
        gamma = 2.0
@@ -68,9 +70,9 @@ class Testfocalloss(object):
            np_y = np.array(case[1])
            np_x_grad = np.array(output[1])

-            x = torch.from_numpy(np_x).cuda().type(dtype)
+            x = torch.from_numpy(np_x).to(device).type(dtype)
            x.requires_grad_()
-            y = torch.from_numpy(np_y).cuda().long()
+            y = torch.from_numpy(np_y).to(device).long()

            loss = sigmoid_focal_loss(x, y, gamma, alpha, None, 'mean')
            loss.backward()
@@ -128,11 +130,31 @@ class Testfocalloss(object):
    def test_softmax_half(self):
        self._test_softmax(dtype=torch.half)

-    def test_sigmoid_float(self):
-        self._test_sigmoid(dtype=torch.float)
-
-    def test_sigmoid_half(self):
-        self._test_sigmoid(dtype=torch.half)
+    @pytest.mark.parametrize('device', [
+        pytest.param(
+            'cuda',
+            marks=pytest.mark.skipif(
+                not IS_CUDA_AVAILABLE, reason='requires CUDA support')),
+        pytest.param(
+            'mlu',
+            marks=pytest.mark.skipif(
+                not IS_MLU_AVAILABLE, reason='requires MLU support'))
+    ])
+    def test_sigmoid_float(self, device):
+        self._test_sigmoid(device=device, dtype=torch.float)
+
+    @pytest.mark.parametrize('device', [
+        pytest.param(
+            'cuda',
+            marks=pytest.mark.skipif(
+                not IS_CUDA_AVAILABLE, reason='requires CUDA support')),
+        pytest.param(
+            'mlu',
+            marks=pytest.mark.skipif(
+                not IS_MLU_AVAILABLE, reason='requires MLU support'))
+    ])
+    def test_sigmoid_half(self, device):
+        self._test_sigmoid(device, dtype=torch.half)

    def test_grad_softmax_float(self):
        self._test_grad_softmax(dtype=torch.float)

--- a/tests/test_ops/test_nms.py
+++ b/tests/test_ops/test_nms.py
@@ -3,12 +3,23 @@ import numpy as np
 import pytest
 import torch

+from mmcv.device.mlu import IS_MLU_AVAILABLE
+from mmcv.utils import IS_CUDA_AVAILABLE
+

 class Testnms(object):

-    def test_nms_allclose(self):
-        if not torch.cuda.is_available():
-            return
+    @pytest.mark.parametrize('device', [
+        pytest.param(
+            'cuda',
+            marks=pytest.mark.skipif(
+                not IS_CUDA_AVAILABLE, reason='requires CUDA support')),
+        pytest.param(
+            'mlu',
+            marks=pytest.mark.skipif(
+                not IS_MLU_AVAILABLE, reason='requires MLU support'))
+    ])
+    def test_nms_allclose(self, device):
        from mmcv.ops import nms
        np_boxes = np.array([[6.0, 3.0, 8.0, 7.0], [3.0, 6.0, 9.0, 11.0],
                             [3.0, 7.0, 10.0, 12.0], [1.0, 4.0, 13.0, 7.0]],
@@ -24,7 +35,7 @@ class Testnms(object):
        assert np.allclose(dets, np_dets)  # test cpu
        assert np.allclose(inds, np_inds)  # test cpu
        dets, inds = nms(
-            boxes.cuda(), scores.cuda(), iou_threshold=0.3, offset=0)
+            boxes.to(device), scores.to(device), iou_threshold=0.3, offset=0)
        assert np.allclose(dets.cpu().numpy(), np_dets)  # test gpu
        assert np.allclose(inds.cpu().numpy(), np_inds)  # test gpu


--- a/tests/test_ops/test_roi_align.py
+++ b/tests/test_ops/test_roi_align.py
@@ -3,6 +3,9 @@ import numpy as np
 import pytest
 import torch

+from mmcv.device.mlu import IS_MLU_AVAILABLE
+from mmcv.utils import IS_CUDA_AVAILABLE
+
 _USING_PARROTS = True
 try:
    from parrots.autograd import gradcheck
@@ -11,6 +14,7 @@ except ImportError:
    _USING_PARROTS = False

 # yapf:disable
+
 inputs = [([[[[1., 2.], [3., 4.]]]],
           [[0., 0., 0., 1., 1.]]),
          ([[[[1., 2.], [3., 4.]],
@@ -39,8 +43,6 @@ sampling_ratio = 2


 def _test_roialign_gradcheck(device, dtype):
-    if not torch.cuda.is_available() and device == 'cuda':
-        pytest.skip('test requires GPU')
    try:
        from mmcv.ops import RoIAlign
    except ModuleNotFoundError:
@@ -65,8 +67,6 @@ def _test_roialign_gradcheck(device, dtype):


 def _test_roialign_allclose(device, dtype):
-    if not torch.cuda.is_available() and device == 'cuda':
-        pytest.skip('test requires GPU')
    try:
        from mmcv.ops import roi_align
    except ModuleNotFoundError:
@@ -75,7 +75,6 @@ def _test_roialign_allclose(device, dtype):
    pool_w = 2
    spatial_scale = 1.0
    sampling_ratio = 2
-
    for case, output in zip(inputs, outputs):
        np_input = np.array(case[0])
        np_rois = np.array(case[1])
@@ -95,8 +94,26 @@ def _test_roialign_allclose(device, dtype):
            x.grad.data.type(torch.float).cpu().numpy(), np_grad, atol=1e-3)


-@pytest.mark.parametrize('device', ['cuda', 'cpu'])
-@pytest.mark.parametrize('dtype', [torch.float, torch.double, torch.half])
+@pytest.mark.parametrize('device', [
+    'cpu',
+    pytest.param(
+        'cuda',
+        marks=pytest.mark.skipif(
+            not IS_CUDA_AVAILABLE, reason='requires CUDA support')),
+    pytest.param(
+        'mlu',
+        marks=pytest.mark.skipif(
+            not IS_MLU_AVAILABLE, reason='requires MLU support'))
+])
+@pytest.mark.parametrize('dtype', [
+    torch.float,
+    pytest.param(
+        torch.double,
+        marks=pytest.mark.skipif(
+            IS_MLU_AVAILABLE,
+            reason='MLU does not support for 64-bit floating point')),
+    torch.half
+])
 def test_roialign(device, dtype):
    # check double only
    if dtype is torch.double:

--- a/tests/test_ops/test_tin_shift.py
+++ b/tests/test_ops/test_tin_shift.py
@@ -5,6 +5,9 @@ import numpy as np
 import pytest
 import torch

+from mmcv.device.mlu import IS_MLU_AVAILABLE
+from mmcv.utils import IS_CUDA_AVAILABLE
+
 _USING_PARROTS = True
 try:
    from parrots.autograd import gradcheck
@@ -131,7 +134,7 @@ grads = [
 ]


-def _test_tinshift_gradcheck(dtype):
+def _test_tinshift_gradcheck(device, dtype):
    try:
        from mmcv.ops import tin_shift
    except ModuleNotFoundError:
@@ -145,15 +148,15 @@ def _test_tinshift_gradcheck(dtype):
        np_shift = np.array(shift)

        x = torch.tensor(
-            np_input, dtype=dtype, device='cuda', requires_grad=True)
-        shift = torch.tensor(np_shift, device='cuda').int()
+            np_input, dtype=dtype, device=device, requires_grad=True)
+        shift = torch.tensor(np_shift, device=device).int()
        if torch.__version__ == 'parrots':
            gradcheck(tin_shift, (x, shift))
        else:
            gradcheck(tin_shift, (x, shift), atol=1, rtol=0.1)


-def _test_tinshift_allclose(dtype):
+def _test_tinshift_allclose(device, dtype):
    try:
        from mmcv.ops import tin_shift
    except ModuleNotFoundError:
@@ -166,8 +169,8 @@ def _test_tinshift_allclose(dtype):
        np_grad = np.array(grad)

        x = torch.tensor(
-            np_input, dtype=dtype, device='cuda', requires_grad=True)
-        shift = torch.tensor(np_shift, device='cuda').int()
+            np_input, dtype=dtype, device=device, requires_grad=True)
+        shift = torch.tensor(np_shift, device=device).int()

        output = tin_shift(x, shift)
        output.backward(torch.ones_like(output))
@@ -177,28 +180,48 @@ def _test_tinshift_allclose(dtype):
            x.grad.data.type(torch.float).cpu().numpy(), np_grad, 1e-3)


-def _test_tinshift_assert(dtype):
+def _test_tinshift_assert(device, dtype):
    try:
        from mmcv.ops import tin_shift
    except ModuleNotFoundError:
        pytest.skip('TINShift op is not successfully compiled')

-    inputs = [torch.rand(2, 3, 4, 2), torch.rand(2, 3, 4, 2)]
+    inputs = [
+        torch.rand(2, 3, 4, 2),
+        torch.rand(2, 3, 4, 2),
+        torch.rand(1, 3, 4, 2)
+    ]
    shifts = [torch.rand(2, 3), torch.rand(2, 5)]

    for x, shift in zip(inputs, shifts):
-        x = x.cuda()
-        shift = shift.cuda()
+        x = x.to(device).type(dtype)
+        shift = shift.to(device).type(dtype)

        # A ValueError should be raised if ops get inputs with wrong shapes.
        with pytest.raises(ValueError):
            tin_shift(x, shift)


-@pytest.mark.skipif(
-    not torch.cuda.is_available(), reason='requires CUDA support')
-@pytest.mark.parametrize('dtype', [torch.float, torch.double, torch.half])
-def test_tinshift(dtype):
-    _test_tinshift_allclose(dtype=dtype)
-    _test_tinshift_gradcheck(dtype=dtype)
-    _test_tinshift_assert(dtype=dtype)
+@pytest.mark.parametrize('device', [
+    pytest.param(
+        'cuda',
+        marks=pytest.mark.skipif(
+            not IS_CUDA_AVAILABLE, reason='requires CUDA support')),
+    pytest.param(
+        'mlu',
+        marks=pytest.mark.skipif(
+            not IS_MLU_AVAILABLE, reason='requires MLU support'))
+])
+@pytest.mark.parametrize('dtype', [
+    torch.float,
+    pytest.param(
+        torch.double,
+        marks=pytest.mark.skipif(
+            IS_MLU_AVAILABLE,
+            reason='MLU does not support for 64-bit floating point')),
+    torch.half
+])
+def test_tinshift(device, dtype):
+    _test_tinshift_allclose(device=device, dtype=dtype)
+    _test_tinshift_gradcheck(device=device, dtype=dtype)
+    _test_tinshift_assert(device=device, dtype=dtype)