Unverified Commit 834f94db authored by ckirchhoff's avatar ckirchhoff Committed by GitHub
Browse files

[Feature] Add support of some ops for Ascend device (#2594)



* [Feature]: add supports of gather_points, nms_rotated, bbox_overlaps for Ascend device

* Apply suggestions from code review

---------
Co-authored-by: default avatarZaida Zhou <58739961+zhouzaida@users.noreply.github.com>
parent 8e2b2bf3
...@@ -7,7 +7,7 @@ We implement common ops used in detection, segmentation, etc. ...@@ -7,7 +7,7 @@ We implement common ops used in detection, segmentation, etc.
| ActiveRotatedFilter | √ | √ | | | | | ActiveRotatedFilter | √ | √ | | | |
| AssignScoreWithK | | √ | | | | | AssignScoreWithK | | √ | | | |
| BallQuery | | √ | | | | | BallQuery | | √ | | | |
| BBoxOverlaps | | √ | √ | √ | | | BBoxOverlaps | | √ | √ | √ | |
| BorderAlign | | √ | | | | | BorderAlign | | √ | | | |
| BoxIouRotated | √ | √ | | | | | BoxIouRotated | √ | √ | | | |
| BoxIouQuadri | √ | √ | | | | | BoxIouQuadri | √ | √ | | | |
...@@ -25,7 +25,7 @@ We implement common ops used in detection, segmentation, etc. ...@@ -25,7 +25,7 @@ We implement common ops used in detection, segmentation, etc.
| FurthestPointSample | | √ | | | | | FurthestPointSample | | √ | | | |
| FurthestPointSampleWithDist | | √ | | | | | FurthestPointSampleWithDist | | √ | | | |
| FusedBiasLeakyrelu | | √ | | | √ | | FusedBiasLeakyrelu | | √ | | | √ |
| GatherPoints | | √ | | | | | GatherPoints | | √ | | | |
| GroupPoints | | √ | | | | | GroupPoints | | √ | | | |
| Iou3d | | √ | √ | | | | Iou3d | | √ | √ | | |
| KNN | | √ | | | | | KNN | | √ | | | |
...@@ -35,7 +35,7 @@ We implement common ops used in detection, segmentation, etc. ...@@ -35,7 +35,7 @@ We implement common ops used in detection, segmentation, etc.
| ModulatedDeformConv2d | √ | √ | | | √ | | ModulatedDeformConv2d | √ | √ | | | √ |
| MultiScaleDeformableAttn | | √ | √ | | | | MultiScaleDeformableAttn | | √ | √ | | |
| NMS | √ | √ | √ | | √ | | NMS | √ | √ | √ | | √ |
| NMSRotated | √ | √ | | | | | NMSRotated | √ | √ | | | |
| NMSQuadri | √ | √ | | | | | NMSQuadri | √ | √ | | | |
| PixelGroup | √ | | | | | | PixelGroup | √ | | | | |
| PointsInBoxes | √ | √ | | | | | PointsInBoxes | √ | √ | | | |
......
...@@ -7,7 +7,7 @@ MMCV 提供了检测、分割等任务中常用的算子 ...@@ -7,7 +7,7 @@ MMCV 提供了检测、分割等任务中常用的算子
| ActiveRotatedFilter | √ | √ | | | | | ActiveRotatedFilter | √ | √ | | | |
| AssignScoreWithK | | √ | | | | | AssignScoreWithK | | √ | | | |
| BallQuery | | √ | | | | | BallQuery | | √ | | | |
| BBoxOverlaps | | √ | √ | √ | | | BBoxOverlaps | | √ | √ | √ | |
| BorderAlign | | √ | | | | | BorderAlign | | √ | | | |
| BoxIouRotated | √ | √ | | | | | BoxIouRotated | √ | √ | | | |
| BoxIouQuadri | √ | √ | | | | | BoxIouQuadri | √ | √ | | | |
...@@ -25,7 +25,7 @@ MMCV 提供了检测、分割等任务中常用的算子 ...@@ -25,7 +25,7 @@ MMCV 提供了检测、分割等任务中常用的算子
| FurthestPointSample | | √ | | | | | FurthestPointSample | | √ | | | |
| FurthestPointSampleWithDist | | √ | | | | | FurthestPointSampleWithDist | | √ | | | |
| FusedBiasLeakyrelu | | √ | | | √ | | FusedBiasLeakyrelu | | √ | | | √ |
| GatherPoints | | √ | | | | | GatherPoints | | √ | | | |
| GroupPoints | | √ | | | | | GroupPoints | | √ | | | |
| Iou3d | | √ | √ | | | | Iou3d | | √ | √ | | |
| KNN | | √ | | | | | KNN | | √ | | | |
...@@ -35,7 +35,7 @@ MMCV 提供了检测、分割等任务中常用的算子 ...@@ -35,7 +35,7 @@ MMCV 提供了检测、分割等任务中常用的算子
| ModulatedDeformConv2d | √ | √ | | | √ | | ModulatedDeformConv2d | √ | √ | | | √ |
| MultiScaleDeformableAttn | | √ | √ | | | | MultiScaleDeformableAttn | | √ | √ | | |
| NMS | √ | √ | √ | | √ | | NMS | √ | √ | √ | | √ |
| NMSRotated | √ | √ | | | | | NMSRotated | √ | √ | | | |
| NMSQuadri | √ | √ | | | | | NMSQuadri | √ | √ | | | |
| PixelGroup | √ | | | | | | PixelGroup | √ | | | | |
| PointsInBoxes | √ | √ | | | | | PointsInBoxes | √ | √ | | | |
......
...@@ -12,21 +12,32 @@ Tensor nms_rotated_cuda(const Tensor dets, const Tensor scores, ...@@ -12,21 +12,32 @@ Tensor nms_rotated_cuda(const Tensor dets, const Tensor scores,
const float iou_threshold, const int multi_label); const float iou_threshold, const int multi_label);
#endif #endif
#ifdef MMCV_WITH_NPU
Tensor nms_rotated_npu(const Tensor dets, const Tensor scores,
const Tensor labels, const float iou_threshold);
#endif
// Interface for Python // Interface for Python
// inline is needed to prevent multiple function definitions when this header is // inline is needed to prevent multiple function definitions when this header is
// included by different cpps // included by different cpps
Tensor nms_rotated(const Tensor dets, const Tensor scores, const Tensor order, Tensor nms_rotated(const Tensor dets, const Tensor scores, const Tensor order,
const Tensor dets_sorted, const float iou_threshold, const Tensor dets_sorted, const Tensor labels,
const int multi_label) { const float iou_threshold, const int multi_label) {
assert(dets.device().is_cuda() == scores.device().is_cuda()); assert(dets.device().is_cuda() == scores.device().is_cuda());
if (dets.device().is_cuda()) { if (dets.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA #ifdef MMCV_WITH_CUDA
return nms_rotated_cuda(dets, scores, order, dets_sorted, iou_threshold, return nms_rotated_cuda(dets, scores, order, dets_sorted.contiguous(),
multi_label); iou_threshold, multi_label);
#else #else
AT_ERROR("Not compiled with GPU support"); AT_ERROR("Not compiled with GPU support");
#endif
} else if (dets.device().type() == at::kXLA) {
#ifdef MMCV_WITH_NPU
return nms_rotated_npu(dets, scores, labels, iou_threshold);
#else
AT_ERROR("Not compiled with NPU support");
#endif #endif
} }
return nms_rotated_cpu(dets, scores, iou_threshold); return nms_rotated_cpu(dets.contiguous(), scores.contiguous(), iou_threshold);
} }
#include "pytorch_npu_helper.hpp"
using namespace NPU_NAME_SPACE;
using namespace std;
void bbox_overlaps_impl(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
const int mode, const bool aligned, const int offset);
void bbox_overlaps_npu(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
const int mode, const bool aligned, const int offset) {
string modeStr = "iou";
if (mode == 1) {
modeStr = "iof";
}
at::Tensor bboxes = at::ones_like(bboxes2);
at::Tensor gtboxes = at::ones_like(bboxes1);
bboxes = aligned ? bboxes2.transpose(0, 1) : bboxes2;
gtboxes = aligned ? bboxes1.transpose(0, 1) : bboxes1;
OpCommand cmd;
cmd.Name("Iou")
.Input(bboxes)
.Input(gtboxes)
.Output(ious)
.Attr("mode", modeStr)
.Attr("eps", (float)offset)
.Attr("aligned", aligned)
.Run();
}
REGISTER_NPU_IMPL(bbox_overlaps_impl, bbox_overlaps_npu);
...@@ -45,11 +45,11 @@ void deform_roi_pool_backward_npu(Tensor grad_output, Tensor input, Tensor rois, ...@@ -45,11 +45,11 @@ void deform_roi_pool_backward_npu(Tensor grad_output, Tensor input, Tensor rois,
int64_t sampling_ratio_ = (int64_t)sampling_ratio; int64_t sampling_ratio_ = (int64_t)sampling_ratio;
OpCommand cmd; OpCommand cmd;
cmd.Name("DeformableRoiPoolGrad") cmd.Name("DeformableRoiPoolGrad")
.Input(grad_input) .Input(grad_output)
.Input(input) .Input(input)
.Input(rois) .Input(rois)
.Input(offset) .Input(offset)
.Output(grad_output) .Output(grad_input)
.Output(grad_offset) .Output(grad_offset)
.Attr("output_size", output_size) .Attr("output_size", output_size)
.Attr("spatial_scale", spatial_scale) .Attr("spatial_scale", spatial_scale)
......
#include "pytorch_npu_helper.hpp"
using namespace NPU_NAME_SPACE;
using namespace std;
void gather_points_forward_npu(int b, int c, int n, int npoints,
const Tensor points, const Tensor idx,
Tensor out) {
// b, c, n, and npoints do not need to be passed into gatherv2,
// b, c, n, and npoints are calculated inside the operator
// gatherv2 operator in ascend needs to set axis to 2, batch_dims is 1
c10::SmallVector<int64_t, N> axis = {2};
int64_t batch_dims = 1;
OpCommand cmd;
cmd.Name("GatherV2")
.Input(points)
.Input(idx)
.Input(axis)
.Output(out)
.Attr("batch_dims", batch_dims)
.Run();
}
void gather_points_forward_impl(int b, int c, int n, int npoints,
const Tensor points, const Tensor idx,
Tensor out);
REGISTER_NPU_IMPL(gather_points_forward_impl, gather_points_forward_npu);
#include "pytorch_npu_helper.hpp"
using namespace NPU_NAME_SPACE;
Tensor nms_rotated_npu(const Tensor dets, const Tensor scores,
const Tensor labels, const float iou_threshold) {
auto originDtype = dets.scalar_type();
at::Tensor detsCast = dets;
at::Tensor scoresCast = scores;
if (originDtype != at::ScalarType::Float) {
detsCast = NPUNativeFunctions::npu_dtype_cast(dets, at::kFloat);
scoresCast = NPUNativeFunctions::npu_dtype_cast(scores, at::kFloat);
}
c10::SmallVector<int64_t, SIZE> selectedIndexSize = {dets.size(0)};
at::Tensor selectedBox = OpPreparation::ApplyTensor(dets);
at::Tensor selectedIndex = OpPreparation::ApplyTensor(
selectedIndexSize, dets.options().dtype(at::kInt), dets);
c10::SmallVector<int64_t, N> output_sync_idx = {0, 1};
OpCommand cmd;
cmd.Sync(output_sync_idx)
.Name("RotatedNMS")
.Input(detsCast)
.Input(scoresCast)
.Input(labels)
.Output(selectedBox)
.Output(selectedIndex)
.Attr("iou_threshold", (float)iou_threshold)
.Run();
selectedIndex = NPUNativeFunctions::npu_dtype_cast(selectedIndex, at::kLong);
return selectedIndex;
}
...@@ -11,7 +11,6 @@ void roi_pool_forward_npu(Tensor input, Tensor rois, Tensor output, ...@@ -11,7 +11,6 @@ void roi_pool_forward_npu(Tensor input, Tensor rois, Tensor output,
int64_t pooled_channel = 1; int64_t pooled_channel = 1;
at::Tensor roi_actual_num = at_npu::native::OpPreparation::ApplyTensor( at::Tensor roi_actual_num = at_npu::native::OpPreparation::ApplyTensor(
{}, rois.options().dtype(at::kInt), rois); {}, rois.options().dtype(at::kInt), rois);
OpCommand cmd; OpCommand cmd;
cmd.Name("RoiPoolingWithArgMax") cmd.Name("RoiPoolingWithArgMax")
.Input(input) .Input(input)
...@@ -27,8 +26,38 @@ void roi_pool_forward_npu(Tensor input, Tensor rois, Tensor output, ...@@ -27,8 +26,38 @@ void roi_pool_forward_npu(Tensor input, Tensor rois, Tensor output,
.Run(); .Run();
} }
void roi_pool_backward_npu(Tensor grad_output, Tensor rois, Tensor argmax,
Tensor grad_input, int pooled_height,
int pooled_width, float spatial_scale) {
int64_t pooled_height_64 = pooled_height;
int64_t pooled_width_64 = pooled_width;
int64_t pooled_channel = 1;
at::Tensor roi_actual_num = at_npu::native::OpPreparation::ApplyTensor(
{}, rois.options().dtype(at::kInt), rois);
at::Tensor x = at::ones_like(grad_input);
OpCommand cmd;
cmd.Name("RoiPoolingGradWithArgMax")
.Input(grad_output)
.Input(x)
.Input(rois)
.Input(roi_actual_num)
.Input(argmax)
.Output(grad_input)
.Attr("pooled_h", pooled_height_64)
.Attr("pooled_w", pooled_width_64)
.Attr("spatial_scale_h", spatial_scale)
.Attr("spatial_scale_w", spatial_scale)
.Attr("pool_channel", pooled_channel)
.Run();
}
void roi_pool_forward_impl(Tensor input, Tensor rois, Tensor output, void roi_pool_forward_impl(Tensor input, Tensor rois, Tensor output,
Tensor argmax, int pooled_height, int pooled_width, Tensor argmax, int pooled_height, int pooled_width,
float spatial_scale); float spatial_scale);
void roi_pool_backward_impl(Tensor grad_output, Tensor rois, Tensor argmax,
Tensor grad_input, int pooled_height,
int pooled_width, float spatial_scale);
REGISTER_NPU_IMPL(roi_pool_forward_impl, roi_pool_forward_npu); REGISTER_NPU_IMPL(roi_pool_forward_impl, roi_pool_forward_npu);
REGISTER_NPU_IMPL(roi_pool_backward_impl, roi_pool_backward_npu);
...@@ -309,8 +309,8 @@ void box_iou_rotated(const Tensor boxes1, const Tensor boxes2, Tensor ious, ...@@ -309,8 +309,8 @@ void box_iou_rotated(const Tensor boxes1, const Tensor boxes2, Tensor ious,
const int mode_flag, const bool aligned); const int mode_flag, const bool aligned);
Tensor nms_rotated(const Tensor dets, const Tensor scores, const Tensor order, Tensor nms_rotated(const Tensor dets, const Tensor scores, const Tensor order,
const Tensor dets_sorted, const float iou_threshold, const Tensor dets_sorted, const Tensor labels,
const int multi_label); const float iou_threshold, const int multi_label);
Tensor upfirdn2d(const Tensor &input, const Tensor &kernel, int up_x, int up_y, Tensor upfirdn2d(const Tensor &input, const Tensor &kernel, int up_x, int up_y,
int down_x, int down_y, int pad_x0, int pad_x1, int pad_y0, int down_x, int down_y, int pad_x0, int pad_x1, int pad_y0,
...@@ -758,7 +758,7 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { ...@@ -758,7 +758,7 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
py::arg("mode_flag"), py::arg("aligned")); py::arg("mode_flag"), py::arg("aligned"));
m.def("nms_rotated", &nms_rotated, "NMS for rotated boxes", py::arg("dets"), m.def("nms_rotated", &nms_rotated, "NMS for rotated boxes", py::arg("dets"),
py::arg("scores"), py::arg("order"), py::arg("dets_sorted"), py::arg("scores"), py::arg("order"), py::arg("dets_sorted"),
py::arg("iou_threshold"), py::arg("multi_label")); py::arg("labels"), py::arg("iou_threshold"), py::arg("multi_label"));
m.def("ball_query_forward", &ball_query_forward, "ball_query_forward", m.def("ball_query_forward", &ball_query_forward, "ball_query_forward",
py::arg("new_xyz_tensor"), py::arg("xyz_tensor"), py::arg("idx_tensor"), py::arg("new_xyz_tensor"), py::arg("xyz_tensor"), py::arg("idx_tensor"),
py::arg("b"), py::arg("n"), py::arg("m"), py::arg("min_radius"), py::arg("b"), py::arg("n"), py::arg("m"), py::arg("min_radius"),
......
...@@ -406,6 +406,19 @@ def nms_rotated(dets: Tensor, ...@@ -406,6 +406,19 @@ def nms_rotated(dets: Tensor,
else: else:
dets_cw = dets dets_cw = dets
multi_label = labels is not None multi_label = labels is not None
if labels is None:
input_labels = scores.new_empty(0, dtype=torch.int)
else:
input_labels = labels
if dets.device.type == 'npu':
order = scores.new_empty(0, dtype=torch.long)
keep_inds = ext_module.nms_rotated(dets_cw, scores, order, dets_cw,
input_labels, iou_threshold,
multi_label)
dets = torch.cat((dets[keep_inds], scores[keep_inds].reshape(-1, 1)),
dim=1)
return dets, keep_inds
if multi_label: if multi_label:
dets_wl = torch.cat((dets_cw, labels.unsqueeze(1)), 1) # type: ignore dets_wl = torch.cat((dets_cw, labels.unsqueeze(1)), 1) # type: ignore
else: else:
...@@ -419,11 +432,13 @@ def nms_rotated(dets: Tensor, ...@@ -419,11 +432,13 @@ def nms_rotated(dets: Tensor,
scores, scores,
order, order,
dets_sorted, dets_sorted,
input_labels,
iou_threshold=iou_threshold, iou_threshold=iou_threshold,
multi_label=multi_label) multi_label=multi_label)
else: else:
keep_inds = ext_module.nms_rotated(dets_wl, scores, order, dets_sorted, keep_inds = ext_module.nms_rotated(dets_wl, scores, order, dets_sorted,
iou_threshold, multi_label) input_labels, iou_threshold,
multi_label)
dets = torch.cat((dets[keep_inds], scores[keep_inds].reshape(-1, 1)), dets = torch.cat((dets[keep_inds], scores[keep_inds].reshape(-1, 1)),
dim=1) dim=1)
return dets, keep_inds return dets, keep_inds
......
...@@ -3,7 +3,8 @@ import numpy as np ...@@ -3,7 +3,8 @@ import numpy as np
import pytest import pytest
import torch import torch
from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE, IS_MPS_AVAILABLE from mmcv.utils import (IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE, IS_MPS_AVAILABLE,
IS_NPU_AVAILABLE)
class TestBBox: class TestBBox:
...@@ -47,7 +48,11 @@ class TestBBox: ...@@ -47,7 +48,11 @@ class TestBBox:
pytest.param( pytest.param(
'mps', 'mps',
marks=pytest.mark.skipif( marks=pytest.mark.skipif(
not IS_MPS_AVAILABLE, reason='requires MPS support')) not IS_MPS_AVAILABLE, reason='requires MPS support')),
pytest.param(
'npu',
marks=pytest.mark.skipif(
not IS_NPU_AVAILABLE, reason='requires NPU support'))
]) ])
def test_bbox_overlaps_float(self, device): def test_bbox_overlaps_float(self, device):
self._test_bbox_overlaps(device, dtype=torch.float) self._test_bbox_overlaps(device, dtype=torch.float)
...@@ -60,7 +65,11 @@ class TestBBox: ...@@ -60,7 +65,11 @@ class TestBBox:
pytest.param( pytest.param(
'mlu', 'mlu',
marks=pytest.mark.skipif( marks=pytest.mark.skipif(
not IS_MLU_AVAILABLE, reason='requires MLU support')) not IS_MLU_AVAILABLE, reason='requires MLU support')),
pytest.param(
'npu',
marks=pytest.mark.skipif(
not IS_NPU_AVAILABLE, reason='requires NPU support'))
]) ])
def test_bbox_overlaps_half(self, device): def test_bbox_overlaps_half(self, device):
self._test_bbox_overlaps(device, dtype=torch.half) self._test_bbox_overlaps(device, dtype=torch.half)
...@@ -3,38 +3,52 @@ import pytest ...@@ -3,38 +3,52 @@ import pytest
import torch import torch
from mmcv.ops import gather_points from mmcv.ops import gather_points
from mmcv.utils import IS_CUDA_AVAILABLE, IS_NPU_AVAILABLE
@pytest.mark.skipif( class TestGatherPoints:
not torch.cuda.is_available(), reason='requires CUDA support')
def test_gather_points(): @pytest.mark.parametrize('device', [
features = torch.tensor([[[ pytest.param(
-1.6095, -0.1029, -0.8876, -1.2447, -2.4031, 0.3708, -1.1586, -1.4967, 'cuda',
-0.4800, 0.2252 marks=pytest.mark.skipif(
not IS_CUDA_AVAILABLE, reason='requires CUDA support')),
pytest.param(
'npu',
marks=pytest.mark.skipif(
not IS_NPU_AVAILABLE, reason='requires NPU support'))
])
def test_gather_points_all_close(self, device):
features = torch.tensor(
[[[
-1.6095, -0.1029, -0.8876, -1.2447, -2.4031, 0.3708, -1.1586,
-1.4967, -0.4800, 0.2252
], ],
[ [
1.9138, 3.4979, 1.6854, 1.5631, 3.6776, 1.9138, 3.4979, 1.6854, 1.5631, 3.6776, 3.1154, 2.1705,
3.1154, 2.1705, 2.5221, 2.0411, 3.1446 2.5221, 2.0411, 3.1446
], ],
[ [
-1.4173, 0.3073, -1.4339, -1.4340, -1.2770, -1.4173, 0.3073, -1.4339, -1.4340, -1.2770, -0.2867, -1.4162,
-0.2867, -1.4162, -1.4044, -1.4245, -1.4074 -1.4044, -1.4245, -1.4074
]], ]],
[[ [[
0.2160, 0.0842, 0.3661, -0.2749, -0.4909, 0.2160, 0.0842, 0.3661, -0.2749, -0.4909, -0.6066, -0.8773,
-0.6066, -0.8773, -0.0745, -0.9496, 0.1434 -0.0745, -0.9496, 0.1434
], ],
[ [
1.3644, 1.8087, 1.6855, 1.9563, 1.2746, 1.3644, 1.8087, 1.6855, 1.9563, 1.2746, 1.9662, 0.9566,
1.9662, 0.9566, 1.8778, 1.1437, 1.3639 1.8778, 1.1437, 1.3639
], ],
[ [
-0.7172, 0.1692, 0.2241, 0.0721, -0.7540, -0.7172, 0.1692, 0.2241, 0.0721, -0.7540, 0.0462, -0.6227,
0.0462, -0.6227, 0.3223, -0.6944, -0.5294 0.3223, -0.6944, -0.5294
]]]).cuda() ]]],
dtype=torch.float,
idx = torch.tensor([[0, 1, 4, 0, 0, 0], [0, 5, 6, 0, 0, 0]]).int().cuda() device=device)
idx = torch.tensor([[0, 1, 4, 0, 0, 0], [0, 5, 6, 0, 0, 0]],
dtype=torch.int32,
device=device)
output = gather_points(features, idx) output = gather_points(features, idx)
expected_output = torch.tensor( expected_output = torch.tensor(
[[[-1.6095, -0.1029, -2.4031, -1.6095, -1.6095, -1.6095], [[[-1.6095, -0.1029, -2.4031, -1.6095, -1.6095, -1.6095],
...@@ -42,7 +56,9 @@ def test_gather_points(): ...@@ -42,7 +56,9 @@ def test_gather_points():
[-1.4173, 0.3073, -1.2770, -1.4173, -1.4173, -1.4173]], [-1.4173, 0.3073, -1.2770, -1.4173, -1.4173, -1.4173]],
[[0.2160, -0.6066, -0.8773, 0.2160, 0.2160, 0.2160], [[0.2160, -0.6066, -0.8773, 0.2160, 0.2160, 0.2160],
[1.3644, 1.9662, 0.9566, 1.3644, 1.3644, 1.3644], [1.3644, 1.9662, 0.9566, 1.3644, 1.3644, 1.3644],
[-0.7172, 0.0462, -0.6227, -0.7172, -0.7172, -0.7172]]]).cuda() [-0.7172, 0.0462, -0.6227, -0.7172, -0.7172, -0.7172]]],
dtype=torch.float,
device=device)
assert torch.allclose(output, expected_output) assert torch.allclose(output, expected_output)
......
...@@ -3,13 +3,22 @@ import numpy as np ...@@ -3,13 +3,22 @@ import numpy as np
import pytest import pytest
import torch import torch
from mmcv.utils import IS_CUDA_AVAILABLE, IS_NPU_AVAILABLE
@pytest.mark.skipif(
not torch.cuda.is_available(),
reason='GPU is required to test NMSRotated op')
class TestNmsRotated: class TestNmsRotated:
def test_ml_nms_rotated(self): @pytest.mark.parametrize('device', [
pytest.param(
'npu',
marks=pytest.mark.skipif(
not IS_NPU_AVAILABLE, reason='requires NPU support')),
pytest.param(
'cuda',
marks=pytest.mark.skipif(
not IS_CUDA_AVAILABLE, reason='requires CUDA support'))
])
def test_ml_nms_rotated(self, device):
from mmcv.ops import nms_rotated from mmcv.ops import nms_rotated
np_boxes = np.array( np_boxes = np.array(
[[6.0, 3.0, 8.0, 7.0, 0.5, 0.7], [3.0, 6.0, 9.0, 11.0, 0.6, 0.8], [[6.0, 3.0, 8.0, 7.0, 0.5, 0.7], [3.0, 6.0, 9.0, 11.0, 0.6, 0.8],
...@@ -24,8 +33,8 @@ class TestNmsRotated: ...@@ -24,8 +33,8 @@ class TestNmsRotated:
dtype=np.float32) dtype=np.float32)
np_expect_keep_inds = np.array([3, 1, 0], dtype=np.int64) np_expect_keep_inds = np.array([3, 1, 0], dtype=np.int64)
boxes = torch.from_numpy(np_boxes).cuda() boxes = torch.from_numpy(np_boxes).to(device)
labels = torch.from_numpy(np_labels).cuda() labels = torch.from_numpy(np_labels).to(device)
# test cw angle definition # test cw angle definition
dets, keep_inds = nms_rotated(boxes[:, :5], boxes[:, -1], 0.5, labels) dets, keep_inds = nms_rotated(boxes[:, :5], boxes[:, -1], 0.5, labels)
...@@ -41,7 +50,17 @@ class TestNmsRotated: ...@@ -41,7 +50,17 @@ class TestNmsRotated:
assert np.allclose(dets.cpu().numpy()[:, :5], np_expect_dets) assert np.allclose(dets.cpu().numpy()[:, :5], np_expect_dets)
assert np.allclose(keep_inds.cpu().numpy(), np_expect_keep_inds) assert np.allclose(keep_inds.cpu().numpy(), np_expect_keep_inds)
def test_nms_rotated(self): @pytest.mark.parametrize('device', [
pytest.param(
'npu',
marks=pytest.mark.skipif(
not IS_NPU_AVAILABLE, reason='requires NPU support')),
pytest.param(
'cuda',
marks=pytest.mark.skipif(
not IS_CUDA_AVAILABLE, reason='requires CUDA support'))
])
def test_nms_rotated(self, device):
from mmcv.ops import nms_rotated from mmcv.ops import nms_rotated
np_boxes = np.array( np_boxes = np.array(
[[6.0, 3.0, 8.0, 7.0, 0.5, 0.7], [3.0, 6.0, 9.0, 11.0, 0.6, 0.8], [[6.0, 3.0, 8.0, 7.0, 0.5, 0.7], [3.0, 6.0, 9.0, 11.0, 0.6, 0.8],
...@@ -55,7 +74,7 @@ class TestNmsRotated: ...@@ -55,7 +74,7 @@ class TestNmsRotated:
dtype=np.float32) dtype=np.float32)
np_expect_keep_inds = np.array([3, 1, 0], dtype=np.int64) np_expect_keep_inds = np.array([3, 1, 0], dtype=np.int64)
boxes = torch.from_numpy(np_boxes).cuda() boxes = torch.from_numpy(np_boxes).to(device)
# test cw angle definition # test cw angle definition
dets, keep_inds = nms_rotated(boxes[:, :5], boxes[:, -1], 0.5) dets, keep_inds = nms_rotated(boxes[:, :5], boxes[:, -1], 0.5)
......
...@@ -72,7 +72,6 @@ class TestRoiPool: ...@@ -72,7 +72,6 @@ class TestRoiPool:
x = torch.tensor( x = torch.tensor(
np_input, dtype=dtype, device=device, requires_grad=True) np_input, dtype=dtype, device=device, requires_grad=True)
rois = torch.tensor(np_rois, dtype=dtype, device=device) rois = torch.tensor(np_rois, dtype=dtype, device=device)
output = roi_pool(x, rois, (pool_h, pool_w), spatial_scale) output = roi_pool(x, rois, (pool_h, pool_w), spatial_scale)
output.backward(torch.ones_like(output)) output.backward(torch.ones_like(output))
assert np.allclose(output.data.cpu().numpy(), np_output, 1e-3) assert np.allclose(output.data.cpu().numpy(), np_output, 1e-3)
...@@ -97,8 +96,8 @@ class TestRoiPool: ...@@ -97,8 +96,8 @@ class TestRoiPool:
pytest.param( pytest.param(
torch.double, torch.double,
marks=pytest.mark.skipif( marks=pytest.mark.skipif(
IS_MLU_AVAILABLE, IS_MLU_AVAILABLE or IS_NPU_AVAILABLE,
reason='MLU does not support for 64-bit floating point')), reason='MLU, NPU does not support for 64-bit floating point')),
torch.half torch.half
]) ])
def test_roipool_allclose(self, device, dtype): def test_roipool_allclose(self, device, dtype):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment