Commit 0e2f8a5c authored by limm's avatar limm
Browse files

add v2.2.0

parent 2754cb11
......@@ -51,10 +51,11 @@ class DeformConv2dFunction(Function):
@staticmethod
def _npu_backward(ctx, grad_output):
import torch_npu
input_tensor, weight, offset_out, offset_all, sort_index_for_npu_bp = \
ctx.saved_tensors
grad_input, grad_weight, grad_offset_all, grad_bias = \
torch.npu_deformable_conv2dbk(
torch_npu.npu_deformable_conv2dbk(
input_tensor, grad_output, offset_out, weight, offset_all,
kernel_size=[weight.shape[3], weight.shape[2]],
stride=[1, 1, ctx.stride[0], ctx.stride[1]],
......
......@@ -83,8 +83,9 @@ class ModulatedDeformConv2dFunction(Function):
def _npu_backward(ctx, grad_output):
input_tensor, weight, offset_out, offset_all, sort_index_bp = \
ctx.saved_tensors
import torch_npu
grad_input, grad_weight, grad_offset_all, grad_bias = \
torch.npu_deformable_conv2dbk(
torch_npu.npu_deformable_conv2dbk(
input_tensor, grad_output, offset_out, weight, offset_all,
kernel_size=[weight.shape[3], weight.shape[2]],
stride=[1, 1, ctx.stride[0], ctx.stride[1]],
......
......@@ -12,7 +12,7 @@ from mmengine.registry import MODELS
from mmengine.utils import deprecated_api_warning
from torch.autograd.function import Function, once_differentiable
from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE
from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE, IS_NPU_AVAILABLE
from ..utils import ext_loader
ext_module = ext_loader.load_ext(
......@@ -84,7 +84,7 @@ class MultiScaleDeformableAttnFunction(Function):
Returns:
tuple[Tensor]: Gradient of input tensors in forward.
"""
value, value_spatial_shapes, value_level_start_index,\
value, value_spatial_shapes, value_level_start_index, \
sampling_locations, attention_weights = ctx.saved_tensors
grad_value = torch.zeros_like(value)
grad_sampling_loc = torch.zeros_like(sampling_locations)
......@@ -364,7 +364,8 @@ class MultiScaleDeformableAttention(BaseModule):
f'Last dim of reference_points must be'
f' 2 or 4, but get {reference_points.shape[-1]} instead.')
if ((IS_CUDA_AVAILABLE and value.is_cuda)
or (IS_MLU_AVAILABLE and value.is_mlu)):
or (IS_MLU_AVAILABLE and value.is_mlu)
or (IS_NPU_AVAILABLE and value.device.type == 'npu')):
output = MultiScaleDeformableAttnFunction.apply(
value, spatial_shapes, level_start_index, sampling_locations,
attention_weights, self.im2col_step)
......
# Copyright (c) OpenMMLab. All rights reserved.
__version__ = '2.1.0'
__version__ = '2.2.0'
def parse_version_info(version_str: str, length: int = 4) -> tuple:
......
......@@ -244,10 +244,12 @@ def get_extensions():
dipu_path = os.getenv('DIPU_PATH')
vendor_include_dirs = os.getenv('VENDOR_INCLUDE_DIRS')
nccl_include_dirs = os.getenv('NCCL_INCLUDE_DIRS')
pytorch_dir = os.getenv('PYTORCH_DIR')
include_dirs.append(dipu_root)
include_dirs.append(diopi_path + '/include')
include_dirs.append(dipu_path + '/dist/include')
include_dirs.append(vendor_include_dirs)
include_dirs.append(pytorch_dir + 'torch/include')
if nccl_include_dirs:
include_dirs.append(nccl_include_dirs)
library_dirs += [dipu_root]
......@@ -395,12 +397,22 @@ def get_extensions():
elif (os.getenv('FORCE_NPU', '0') == '1'):
print(f'Compiling {ext_name} only with CPU and NPU')
try:
import importlib
from torch_npu.utils.cpp_extension import NpuExtension
extra_compile_args['cxx'] += [
'-D__FILENAME__=\"$$(notdir $$(abspath $$<))\"'
]
extra_compile_args['cxx'] += [
'-I' + importlib.util.find_spec(
'torch_npu').submodule_search_locations[0] +
'/include/third_party/acl/inc'
]
define_macros += [('MMCV_WITH_NPU', None)]
extension = NpuExtension
if parse_version(torch.__version__) <= parse_version('2.0.0'):
if parse_version(torch.__version__) < parse_version('2.1.0'):
define_macros += [('MMCV_WITH_XLA', None)]
if parse_version(torch.__version__) > parse_version('2.0.0'):
if parse_version(torch.__version__) >= parse_version('2.1.0'):
define_macros += [('MMCV_WITH_KPRIVATE', None)]
except Exception:
raise ImportError('can not find any torch_npu')
......
......@@ -3,7 +3,7 @@ import pytest
import torch
from mmcv.ops import ball_query
from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE
from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE, IS_NPU_AVAILABLE
@pytest.mark.parametrize('device', [
......@@ -14,7 +14,11 @@ from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE
pytest.param(
'mlu',
marks=pytest.mark.skipif(
not IS_MLU_AVAILABLE, reason='requires MLU support'))
not IS_MLU_AVAILABLE, reason='requires MLU support')),
pytest.param(
'npu',
marks=pytest.mark.skipif(
not IS_NPU_AVAILABLE, reason='requires NPU support'))
])
def test_ball_query(device):
new_xyz = torch.tensor(
......@@ -59,20 +63,25 @@ def test_ball_query(device):
assert torch.all(idx == expected_idx)
@pytest.mark.skipif(
not torch.cuda.is_available(), reason='requires CUDA support')
def test_stack_ball_query():
new_xyz = torch.tensor([[-0.0740, 1.3147, -1.3625],
[-2.2769, 2.7817, -0.2334],
[-0.4003, 2.4666, -0.5116],
[-0.0740, 1.3147, -1.3625],
[-0.0740, 1.3147, -1.3625],
[-2.0289, 2.4952, -0.1708],
[-2.0668, 6.0278, -0.4875],
[0.4066, 1.4211, -0.2947],
[-2.0289, 2.4952, -0.1708],
[-2.0289, 2.4952, -0.1708]]).cuda()
new_xyz_batch_cnt = torch.tensor([5, 5], dtype=torch.int32).cuda()
@pytest.mark.parametrize('device', [
pytest.param(
'cuda',
marks=pytest.mark.skipif(
not IS_CUDA_AVAILABLE, reason='requires CUDA support')),
pytest.param(
'npu',
marks=pytest.mark.skipif(
not IS_NPU_AVAILABLE, reason='requires NPU support'))
])
def test_stack_ball_query(device):
new_xyz = torch.tensor(
[[-0.0740, 1.3147, -1.3625], [-2.2769, 2.7817, -0.2334],
[-0.4003, 2.4666, -0.5116], [-0.0740, 1.3147, -1.3625],
[-0.0740, 1.3147, -1.3625], [-2.0289, 2.4952, -0.1708],
[-2.0668, 6.0278, -0.4875], [0.4066, 1.4211, -0.2947],
[-2.0289, 2.4952, -0.1708], [-2.0289, 2.4952, -0.1708]],
device=device)
new_xyz_batch_cnt = torch.tensor([5, 5], dtype=torch.int32, device=device)
xyz = torch.tensor([[-0.0740, 1.3147, -1.3625], [0.5555, 1.0399, -1.3634],
[-0.4003, 2.4666, -0.5116], [-0.5251, 2.4379, -0.8466],
[-0.9691, 1.1418, -1.3733], [-0.2232, 0.9561, -1.3626],
......@@ -82,15 +91,15 @@ def test_stack_ball_query():
[-2.0668, 6.0278, -0.4875], [-1.9304, 3.3092, 0.6610],
[0.0949, 1.4332, 0.3140], [-1.2879, 2.0008, -0.7791],
[-0.7252, 0.9611, -0.6371], [0.4066, 1.4211, -0.2947],
[0.3220, 1.4447, 0.3548], [-0.9744, 2.3856,
-1.2000]]).cuda()
xyz_batch_cnt = torch.tensor([10, 10], dtype=torch.int32).cuda()
[0.3220, 1.4447, 0.3548], [-0.9744, 2.3856, -1.2000]],
device=device)
xyz_batch_cnt = torch.tensor([10, 10], dtype=torch.int32, device=device)
idx = ball_query(0, 0.2, 5, xyz, new_xyz, xyz_batch_cnt, new_xyz_batch_cnt)
expected_idx = torch.tensor([[0, 0, 0, 0, 0], [6, 6, 6, 6, 6],
[2, 2, 2, 2, 2], [0, 0, 0, 0, 0],
[0, 0, 0, 0, 0], [0, 0, 0, 0, 0],
[2, 2, 2, 2, 2], [7, 7, 7, 7, 7],
[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]).cuda()
expected_idx = torch.tensor(
[[0, 0, 0, 0, 0], [6, 6, 6, 6, 6], [2, 2, 2, 2, 2], [0, 0, 0, 0, 0],
[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [2, 2, 2, 2, 2], [7, 7, 7, 7, 7],
[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]],
device=device)
assert torch.all(idx == expected_idx)
xyz = xyz.double()
......
# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
import pytest
import torch
from mmcv.ops import chamfer_distance
from mmcv.utils import IS_CUDA_AVAILABLE, IS_NPU_AVAILABLE
@pytest.mark.skipif(
not torch.cuda.is_available(), reason='requires CUDA support')
def test_chamfer_distance():
pointset1 = torch.tensor(
[[[1.3, 9.39], [2.3, 9.39], [2.3, 10.39], [1.3, 10.39]],
[[1.0, 9.39], [3.0, 9.39], [3.0, 10.39], [1.0, 10.39]],
[[1.6, 9.99], [2.3, 9.99], [2.3, 10.39], [1.6, 10.39]]],
device='cuda',
requires_grad=True)
def chamfer_distance_forward_groundtruth(xyz1, xyz2, dtype):
bs, ns, ss = xyz1.shape
dist1 = np.zeros((bs, ns)).astype(torch_to_np_type(dtype))
dist2 = np.zeros((bs, ns)).astype(torch_to_np_type(dtype))
idx1 = np.zeros((bs, ns)).astype('int32')
idx2 = np.zeros((bs, ns)).astype('int32')
for b1 in range(bs):
for n1 in range(ns):
x1, y1 = xyz1[b1][n1]
dist1[b1][n1] = 10000000
for n2 in range(ns):
x2, y2 = xyz2[b1][n2]
dst = (x1 - x2) * (x1 - x2) + (y1 - y2) * (y1 - y2)
if dist1[b1][n1] > dst:
dist1[b1][n1] = dst
idx1[b1][n1] = n2
for b1 in range(bs):
for n1 in range(ns):
x1, y1 = xyz2[b1][n1]
dist2[b1][n1] = 10000000
for n2 in range(ns):
x2, y2 = xyz1[b1][n2]
dst = (x1 - x2) * (x1 - x2) + (y1 - y2) * (y1 - y2)
if dist2[b1][n1] > dst:
dist2[b1][n1] = dst
idx2[b1][n1] = n2
return [dist1, dist2, idx1, idx2]
pointset2 = torch.tensor(
[[[1.0, 9.39], [3.0, 9.39], [3.0, 10.39], [1.0, 10.39]],
[[1.3, 9.39], [2.3, 9.39], [2.3, 10.39], [1.3, 10.39]],
[[1.0, 9.39], [3.0, 9.39], [3.0, 10.39], [1.0, 10.39]]],
device='cuda',
requires_grad=True)
expected_dist1 = torch.tensor(
[[0.0900, 0.4900, 0.4900, 0.0900], [0.0900, 0.4900, 0.4900, 0.0900],
[0.5200, 0.6500, 0.4900, 0.3600]],
device='cuda')
expected_dist2 = torch.tensor(
[[0.0900, 0.4900, 0.4900, 0.0900], [0.0900, 0.4900, 0.4900, 0.0900],
[0.7200, 0.8500, 0.4900, 0.3600]],
device='cuda')
def torch_to_np_type(dtype):
if dtype == torch.half:
return np.float16
elif dtype == torch.float32:
return np.float32
expected_pointset1_grad = torch.tensor(
[[[0.6000, 0.0000], [-1.4000, 0.0000], [-1.4000, 0.0000],
[0.6000, 0.0000]],
[[-0.6000, 0.0000], [1.4000, 0.0000], [1.4000, 0.0000],
[-0.6000, 0.0000]],
[[1.2000, -0.8000], [-1.4000, -0.8000], [-1.4000, 0.0000],
[1.2000, 0.0000]]],
device='cuda')
expected_pointset2_grad = torch.tensor(
[[[-0.6000, 0.0000], [1.4000, 0.0000], [1.4000, 0.0000],
[-0.6000, 0.0000]],
[[0.6000, 0.0000], [-1.4000, 0.0000], [-1.4000, 0.0000],
[0.6000, 0.0000]],
[[0.0000, 0.0000], [0.0000, 0.0000], [2.8000, 0.8000],
[-2.4000, 0.8000]]],
device='cuda')
dist1, dist2, idx1, idx2 = chamfer_distance(pointset1, pointset2)
dist1.backward(torch.ones_like(dist1))
assert torch.allclose(dist1, expected_dist1, 1e-2)
assert torch.allclose(dist2, expected_dist2, 1e-2)
assert torch.allclose(pointset1.grad.data, expected_pointset1_grad, 1e-2)
assert torch.allclose(pointset2.grad.data, expected_pointset2_grad, 1e-2)
@pytest.mark.parametrize('device', [
pytest.param(
'cuda',
marks=pytest.mark.skipif(
not IS_CUDA_AVAILABLE, reason='requires CUDA support')),
pytest.param(
'npu',
marks=pytest.mark.skipif(
not IS_NPU_AVAILABLE, reason='requires NPU support'))
])
@pytest.mark.parametrize('dtype', [torch.half, torch.float32])
@pytest.mark.parametrize('shape', [(2, 600, 2), (2, 600, 2)])
def test_chamfer_distance_npu_dynamic_shape(dtype, device, shape):
bs = shape[0]
ns = shape[1]
xyz1 = np.random.uniform(-10.0, 10.0,
(bs, ns, 2)).astype(torch_to_np_type(dtype))
xyz2 = np.random.uniform(-10.0, 10.0,
(bs, ns, 2)).astype(torch_to_np_type(dtype))
xyz1_npu = torch.tensor(xyz1, dtype=dtype).to(device)
xyz2_npu = torch.tensor(xyz2, dtype=dtype).to(device)
expected_output = chamfer_distance_forward_groundtruth(xyz1, xyz2, dtype)
output = chamfer_distance(xyz1_npu, xyz2_npu)
assert np.allclose(output[0].cpu().numpy(), expected_output[0], 1e-3, 1e-4)
assert np.allclose(output[1].cpu().numpy(), expected_output[1], 1e-3, 1e-4)
assert np.allclose(output[2].cpu().numpy(), expected_output[2], 1e-3, 1e-4)
assert np.allclose(output[3].cpu().numpy(), expected_output[3], 1e-3, 1e-4)
......@@ -72,13 +72,23 @@ def test_grouping_points(dtype, device):
assert torch.allclose(output, expected_output)
@pytest.mark.skipif(
not torch.cuda.is_available(), reason='requires CUDA support')
@pytest.mark.parametrize('device', [
pytest.param(
'cuda',
marks=pytest.mark.skipif(
not IS_CUDA_AVAILABLE, reason='requires CUDA support')),
pytest.param(
'npu',
marks=pytest.mark.skipif(
not IS_NPU_AVAILABLE, reason='requires NPU support'))
])
@pytest.mark.parametrize('dtype', [torch.half, torch.float, torch.double])
def test_stack_grouping_points(dtype):
def test_stack_grouping_points(dtype, device):
if device == 'npu' and dtype == torch.double:
return
idx = torch.tensor([[0, 0, 0], [3, 3, 3], [8, 8, 8], [1, 1, 1], [0, 0, 0],
[2, 2, 2], [0, 0, 0], [6, 6, 6], [9, 9, 9], [0, 0, 0],
[1, 1, 1], [0, 0, 0]]).int().cuda()
[1, 1, 1], [0, 0, 0]]).int().to(device)
features = torch.tensor([[
0.5798, -0.7981, -0.9280, -1.3311, 1.3687, 0.9277, -0.4164, -1.8274,
0.9268, 0.8414
......@@ -103,9 +113,9 @@ def test_stack_grouping_points(dtype):
-0.6646, -0.6870, -0.1125, -0.2224, -0.3445,
-1.4049, 0.4990, -0.7037, -0.9924, 0.0386
]],
dtype=dtype).cuda()
features_batch_cnt = torch.tensor([3, 3]).int().cuda()
indices_batch_cnt = torch.tensor([6, 6]).int().cuda()
dtype=dtype).to(device)
features_batch_cnt = torch.tensor([3, 3]).int().to(device)
indices_batch_cnt = torch.tensor([6, 6]).int().to(device)
output = grouping_operation(features, idx, features_batch_cnt,
indices_batch_cnt)
expected_output = torch.tensor(
......@@ -169,5 +179,5 @@ def test_stack_grouping_points(dtype):
[-0.3190, -0.3190, -0.3190], [0.7798, 0.7798, 0.7798],
[-0.3693, -0.3693, -0.3693], [-0.9457, -0.9457, -0.9457],
[-0.2942, -0.2942, -0.2942], [-1.8527, -1.8527, -1.8527]]],
dtype=dtype).cuda()
dtype=dtype).to(device)
assert torch.allclose(output, expected_output)
......@@ -5,7 +5,7 @@ import torch
from mmcv.ops.multi_scale_deform_attn import (
MultiScaleDeformableAttention, MultiScaleDeformableAttnFunction,
multi_scale_deformable_attn_pytorch)
from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE
from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE, IS_NPU_AVAILABLE
_USING_PARROTS = True
_IS_AUTOCAST_AVAILABLE = True
......@@ -136,6 +136,40 @@ def test_forward_equal_with_pytorch_double():
assert max_rel_err < 1e-15
@pytest.mark.skipif(not IS_NPU_AVAILABLE, reason='requires NPU support')
def test_forward_equal_with_pytorch_npu():
N, M, D = 6, 4, 8
Lq, L, P = 10000, 4, 8
shapes = torch.as_tensor([(60, 40), (30, 20), (16, 24), (53, 32)],
dtype=torch.int32)
level_start_index = torch.cat((shapes.new_zeros(
(1, )), shapes.prod(1).cumsum(0)[:-1]))
S = sum((H * W).item() for H, W in shapes)
torch.manual_seed(3)
value = torch.rand(N, S, M, D) * 0.01
sampling_locations = torch.rand(N, Lq, M, L, P, 2)
attention_weights = torch.rand(N, Lq, M, L, P) + 1e-5
attention_weights /= attention_weights.sum(
-1, keepdim=True).sum(
-2, keepdim=True)
im2col_step = 2
output_pytorch = multi_scale_deformable_attn_pytorch(
value.float(), shapes, sampling_locations.float(),
attention_weights.float()).detach().cpu()
output_npu = MultiScaleDeformableAttnFunction.apply(
value.npu().float(), shapes.npu(), level_start_index.npu(),
sampling_locations.npu().float(),
attention_weights.npu().float(), im2col_step).detach().cpu()
assert torch.allclose(output_npu, output_pytorch)
max_abs_err = (output_npu - output_pytorch).abs().max()
max_rel_err = ((output_npu - output_pytorch).abs() /
output_pytorch.abs()).max()
assert max_abs_err < 1e-18
assert max_rel_err < 1e-15
@pytest.mark.parametrize('device', [
pytest.param(
'cuda',
......@@ -303,3 +337,67 @@ def test_gradient_numerical(channels,
im2col_step),
eps=eps,
atol=1e-2)
@pytest.mark.skipif(not IS_NPU_AVAILABLE, reason='requires NPU support')
def test_backward_equal_with_pytorch_npu():
N, M, D = 6, 4, 8
Lq, L, P = 10000, 4, 8
shapes = torch.as_tensor([(60, 40), (30, 20), (16, 24), (53, 32)],
dtype=torch.int32)
level_start_index = torch.cat((shapes.new_zeros(
(1, )), shapes.prod(1).cumsum(0)[:-1]))
S = sum((H * W).item() for H, W in shapes)
torch.manual_seed(3)
value = torch.rand(N, S, M, D) * 0.01
sampling_locations = torch.rand(N, Lq, M, L, P, 2)
attention_weights = torch.rand(N, Lq, M, L, P) + 1e-5
attention_weights /= attention_weights.sum(
-1, keepdim=True).sum(
-2, keepdim=True)
im2col_step = 2
value.requires_grad = True
sampling_locations.requires_grad = True
attention_weights.requires_grad = True
output_pytorch = multi_scale_deformable_attn_pytorch(
value.float(), shapes, sampling_locations.float(),
attention_weights.float())
grad_output_pytorch = torch.ones_like(output_pytorch)
output_pytorch.backward(grad_output_pytorch)
grad_value = value.grad.detach().cpu()
grad_location = sampling_locations.grad.detach().cpu()
grad_attn_weight = attention_weights.grad.detach().cpu()
value_npu = value.npu()
shapes_npu = shapes.npu()
level_start_index_npu = level_start_index.npu()
sampling_locations_npu = sampling_locations.npu()
attention_weights_npu = attention_weights.npu()
output_npu = MultiScaleDeformableAttnFunction.apply(
value_npu.float(), shapes_npu, level_start_index_npu,
sampling_locations_npu.float(), attention_weights_npu.float(),
im2col_step)
grad_output_npu = torch.ones_like(output_npu)
output_npu.backward(grad_output_npu)
grad_value_npu = value_npu.grad.detach().cpu()
grad_location_npu = sampling_locations_npu.grad.detach().cpu()
grad_attn_weight_npu = attention_weights_npu.grad.detach().cpu()
assert torch.allclose(grad_value_npu, grad_value)
max_abs_err_1 = (grad_value_npu - grad_value).abs().max()
max_rel_err_1 = ((grad_value_npu - grad_value).abs() /
grad_value.abs()).max()
assert max_abs_err_1 < 1e-5
assert max_rel_err_1 < 1e-4
assert torch.allclose(grad_location_npu, grad_location)
max_abs_err_2 = (grad_location_npu - grad_location).abs().max()
max_rel_err_2 = ((grad_location_npu - grad_location).abs() /
grad_location.abs()).max()
assert max_abs_err_2 < 1e-5
assert max_rel_err_2 < 1e-4
assert torch.allclose(grad_attn_weight_npu, grad_attn_weight)
max_abs_err_3 = (grad_attn_weight_npu - grad_attn_weight).abs().max()
max_rel_err_3 = ((grad_attn_weight_npu - grad_attn_weight).abs() /
grad_attn_weight.abs()).max()
assert max_abs_err_3 < 1e-5
assert max_rel_err_3 < 1e-4
......@@ -3,7 +3,7 @@ import pytest
import torch
from mmcv.ops import rotated_feature_align
from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE
from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE, IS_NPU_AVAILABLE
@pytest.mark.skipif(
......@@ -17,6 +17,10 @@ from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE
'mlu',
marks=pytest.mark.skipif(
not IS_MLU_AVAILABLE, reason='requires MLU support')),
pytest.param(
'npu',
marks=pytest.mark.skipif(
not IS_NPU_AVAILABLE, reason='requires NPU support')),
pytest.param(
'cpu',
marks=pytest.mark.skipif(
......
......@@ -3,12 +3,28 @@ import pytest
import torch
from mmcv.ops import three_interpolate
from mmcv.utils import IS_CUDA_AVAILABLE, IS_NPU_AVAILABLE
@pytest.mark.skipif(
not torch.cuda.is_available(), reason='requires CUDA support')
@pytest.mark.parametrize('dtype', [torch.half, torch.float, torch.double])
def test_three_interpolate(dtype):
@pytest.mark.parametrize('dtype', [
torch.half, torch.float,
pytest.param(
torch.double,
marks=pytest.mark.skipif(
IS_NPU_AVAILABLE,
reason='NPU does not support for 64-bit floating point'))
])
@pytest.mark.parametrize('device', [
pytest.param(
'cuda',
marks=pytest.mark.skipif(
not IS_CUDA_AVAILABLE, reason='requires CUDA support')),
pytest.param(
'npu',
marks=pytest.mark.skipif(
not IS_NPU_AVAILABLE, reason='requires NPU support'))
])
def test_three_interpolate(dtype, device):
features = torch.tensor(
[[[2.4350, 4.7516, 4.4995, 2.4350, 2.4350, 2.4350],
[3.1236, 2.6278, 3.0447, 3.1236, 3.1236, 3.1236],
......@@ -20,12 +36,13 @@ def test_three_interpolate(dtype):
[0.0000, 0.2744, 2.0842, 0.0000, 0.0000, 0.0000],
[0.3414, 1.5063, 1.6209, 0.3414, 0.3414, 0.3414],
[0.5814, 0.0103, 0.0000, 0.5814, 0.5814, 0.5814]]],
dtype=dtype).cuda()
dtype=dtype,
device=device)
idx = torch.tensor([[[0, 1, 2], [2, 3, 4], [2, 3, 4], [0, 1, 2], [0, 1, 2],
[0, 1, 3]],
[[0, 2, 3], [1, 3, 4], [2, 1, 4], [0, 2, 4], [0, 2, 4],
[0, 1, 2]]]).int().cuda()
idx = torch.tensor(
[[[0, 1, 2], [2, 3, 4], [2, 3, 4], [0, 1, 2], [0, 1, 2], [0, 1, 3]],
[[0, 2, 3], [1, 3, 4], [2, 1, 4], [0, 2, 4], [0, 2, 4], [0, 1, 2]]],
device=device).int()
weight = torch.tensor([[[3.3333e-01, 3.3333e-01, 3.3333e-01],
[1.0000e+00, 5.8155e-08, 2.2373e-08],
......@@ -39,7 +56,8 @@ def test_three_interpolate(dtype):
[3.3333e-01, 3.3333e-01, 3.3333e-01],
[3.3333e-01, 3.3333e-01, 3.3333e-01],
[3.3333e-01, 3.3333e-01, 3.3333e-01]]],
dtype=dtype).cuda()
dtype=dtype,
device=device)
output = three_interpolate(features, idx, weight)
expected_output = torch.tensor([[[
......@@ -73,6 +91,7 @@ def test_three_interpolate(dtype):
3.8760e-01, 1.0300e-02, 8.3569e-09,
3.8760e-01, 3.8760e-01, 1.9723e-01
]]],
dtype=dtype).cuda()
dtype=dtype,
device=device)
assert torch.allclose(output, expected_output, 1e-3, 1e-4)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment