Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
MMCV
Commits
0e2f8a5c
Commit
0e2f8a5c
authored
Mar 27, 2025
by
limm
Browse files
add v2.2.0
parent
2754cb11
Changes
51
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
269 additions
and
99 deletions
+269
-99
mmcv/ops/deform_conv.py
mmcv/ops/deform_conv.py
+2
-1
mmcv/ops/modulated_deform_conv.py
mmcv/ops/modulated_deform_conv.py
+2
-1
mmcv/ops/multi_scale_deform_attn.py
mmcv/ops/multi_scale_deform_attn.py
+4
-3
mmcv/version.py
mmcv/version.py
+1
-1
setup.py
setup.py
+14
-2
tests/test_ops/test_ball_query.py
tests/test_ops/test_ball_query.py
+33
-24
tests/test_ops/test_chamfer_distance.py
tests/test_ops/test_chamfer_distance.py
+61
-46
tests/test_ops/test_group_points.py
tests/test_ops/test_group_points.py
+18
-8
tests/test_ops/test_ms_deformable_attn.py
tests/test_ops/test_ms_deformable_attn.py
+99
-1
tests/test_ops/test_rotated_feature_align.py
tests/test_ops/test_rotated_feature_align.py
+5
-1
tests/test_ops/test_three_interpolate.py
tests/test_ops/test_three_interpolate.py
+30
-11
No files found.
mmcv/ops/deform_conv.py
View file @
0e2f8a5c
...
@@ -51,10 +51,11 @@ class DeformConv2dFunction(Function):
...
@@ -51,10 +51,11 @@ class DeformConv2dFunction(Function):
@
staticmethod
@
staticmethod
def
_npu_backward
(
ctx
,
grad_output
):
def
_npu_backward
(
ctx
,
grad_output
):
import
torch_npu
input_tensor
,
weight
,
offset_out
,
offset_all
,
sort_index_for_npu_bp
=
\
input_tensor
,
weight
,
offset_out
,
offset_all
,
sort_index_for_npu_bp
=
\
ctx
.
saved_tensors
ctx
.
saved_tensors
grad_input
,
grad_weight
,
grad_offset_all
,
grad_bias
=
\
grad_input
,
grad_weight
,
grad_offset_all
,
grad_bias
=
\
torch
.
npu_deformable_conv2dbk
(
torch
_npu
.
npu_deformable_conv2dbk
(
input_tensor
,
grad_output
,
offset_out
,
weight
,
offset_all
,
input_tensor
,
grad_output
,
offset_out
,
weight
,
offset_all
,
kernel_size
=
[
weight
.
shape
[
3
],
weight
.
shape
[
2
]],
kernel_size
=
[
weight
.
shape
[
3
],
weight
.
shape
[
2
]],
stride
=
[
1
,
1
,
ctx
.
stride
[
0
],
ctx
.
stride
[
1
]],
stride
=
[
1
,
1
,
ctx
.
stride
[
0
],
ctx
.
stride
[
1
]],
...
...
mmcv/ops/modulated_deform_conv.py
View file @
0e2f8a5c
...
@@ -83,8 +83,9 @@ class ModulatedDeformConv2dFunction(Function):
...
@@ -83,8 +83,9 @@ class ModulatedDeformConv2dFunction(Function):
def
_npu_backward
(
ctx
,
grad_output
):
def
_npu_backward
(
ctx
,
grad_output
):
input_tensor
,
weight
,
offset_out
,
offset_all
,
sort_index_bp
=
\
input_tensor
,
weight
,
offset_out
,
offset_all
,
sort_index_bp
=
\
ctx
.
saved_tensors
ctx
.
saved_tensors
import
torch_npu
grad_input
,
grad_weight
,
grad_offset_all
,
grad_bias
=
\
grad_input
,
grad_weight
,
grad_offset_all
,
grad_bias
=
\
torch
.
npu_deformable_conv2dbk
(
torch
_npu
.
npu_deformable_conv2dbk
(
input_tensor
,
grad_output
,
offset_out
,
weight
,
offset_all
,
input_tensor
,
grad_output
,
offset_out
,
weight
,
offset_all
,
kernel_size
=
[
weight
.
shape
[
3
],
weight
.
shape
[
2
]],
kernel_size
=
[
weight
.
shape
[
3
],
weight
.
shape
[
2
]],
stride
=
[
1
,
1
,
ctx
.
stride
[
0
],
ctx
.
stride
[
1
]],
stride
=
[
1
,
1
,
ctx
.
stride
[
0
],
ctx
.
stride
[
1
]],
...
...
mmcv/ops/multi_scale_deform_attn.py
View file @
0e2f8a5c
...
@@ -12,7 +12,7 @@ from mmengine.registry import MODELS
...
@@ -12,7 +12,7 @@ from mmengine.registry import MODELS
from
mmengine.utils
import
deprecated_api_warning
from
mmengine.utils
import
deprecated_api_warning
from
torch.autograd.function
import
Function
,
once_differentiable
from
torch.autograd.function
import
Function
,
once_differentiable
from
mmcv.utils
import
IS_CUDA_AVAILABLE
,
IS_MLU_AVAILABLE
from
mmcv.utils
import
IS_CUDA_AVAILABLE
,
IS_MLU_AVAILABLE
,
IS_NPU_AVAILABLE
from
..utils
import
ext_loader
from
..utils
import
ext_loader
ext_module
=
ext_loader
.
load_ext
(
ext_module
=
ext_loader
.
load_ext
(
...
@@ -84,7 +84,7 @@ class MultiScaleDeformableAttnFunction(Function):
...
@@ -84,7 +84,7 @@ class MultiScaleDeformableAttnFunction(Function):
Returns:
Returns:
tuple[Tensor]: Gradient of input tensors in forward.
tuple[Tensor]: Gradient of input tensors in forward.
"""
"""
value
,
value_spatial_shapes
,
value_level_start_index
,
\
value
,
value_spatial_shapes
,
value_level_start_index
,
\
sampling_locations
,
attention_weights
=
ctx
.
saved_tensors
sampling_locations
,
attention_weights
=
ctx
.
saved_tensors
grad_value
=
torch
.
zeros_like
(
value
)
grad_value
=
torch
.
zeros_like
(
value
)
grad_sampling_loc
=
torch
.
zeros_like
(
sampling_locations
)
grad_sampling_loc
=
torch
.
zeros_like
(
sampling_locations
)
...
@@ -364,7 +364,8 @@ class MultiScaleDeformableAttention(BaseModule):
...
@@ -364,7 +364,8 @@ class MultiScaleDeformableAttention(BaseModule):
f
'Last dim of reference_points must be'
f
'Last dim of reference_points must be'
f
' 2 or 4, but get
{
reference_points
.
shape
[
-
1
]
}
instead.'
)
f
' 2 or 4, but get
{
reference_points
.
shape
[
-
1
]
}
instead.'
)
if
((
IS_CUDA_AVAILABLE
and
value
.
is_cuda
)
if
((
IS_CUDA_AVAILABLE
and
value
.
is_cuda
)
or
(
IS_MLU_AVAILABLE
and
value
.
is_mlu
)):
or
(
IS_MLU_AVAILABLE
and
value
.
is_mlu
)
or
(
IS_NPU_AVAILABLE
and
value
.
device
.
type
==
'npu'
)):
output
=
MultiScaleDeformableAttnFunction
.
apply
(
output
=
MultiScaleDeformableAttnFunction
.
apply
(
value
,
spatial_shapes
,
level_start_index
,
sampling_locations
,
value
,
spatial_shapes
,
level_start_index
,
sampling_locations
,
attention_weights
,
self
.
im2col_step
)
attention_weights
,
self
.
im2col_step
)
...
...
mmcv/version.py
View file @
0e2f8a5c
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
__version__
=
'2.
1
.0'
__version__
=
'2.
2
.0'
def
parse_version_info
(
version_str
:
str
,
length
:
int
=
4
)
->
tuple
:
def
parse_version_info
(
version_str
:
str
,
length
:
int
=
4
)
->
tuple
:
...
...
setup.py
View file @
0e2f8a5c
...
@@ -244,10 +244,12 @@ def get_extensions():
...
@@ -244,10 +244,12 @@ def get_extensions():
dipu_path
=
os
.
getenv
(
'DIPU_PATH'
)
dipu_path
=
os
.
getenv
(
'DIPU_PATH'
)
vendor_include_dirs
=
os
.
getenv
(
'VENDOR_INCLUDE_DIRS'
)
vendor_include_dirs
=
os
.
getenv
(
'VENDOR_INCLUDE_DIRS'
)
nccl_include_dirs
=
os
.
getenv
(
'NCCL_INCLUDE_DIRS'
)
nccl_include_dirs
=
os
.
getenv
(
'NCCL_INCLUDE_DIRS'
)
pytorch_dir
=
os
.
getenv
(
'PYTORCH_DIR'
)
include_dirs
.
append
(
dipu_root
)
include_dirs
.
append
(
dipu_root
)
include_dirs
.
append
(
diopi_path
+
'/include'
)
include_dirs
.
append
(
diopi_path
+
'/include'
)
include_dirs
.
append
(
dipu_path
+
'/dist/include'
)
include_dirs
.
append
(
dipu_path
+
'/dist/include'
)
include_dirs
.
append
(
vendor_include_dirs
)
include_dirs
.
append
(
vendor_include_dirs
)
include_dirs
.
append
(
pytorch_dir
+
'torch/include'
)
if
nccl_include_dirs
:
if
nccl_include_dirs
:
include_dirs
.
append
(
nccl_include_dirs
)
include_dirs
.
append
(
nccl_include_dirs
)
library_dirs
+=
[
dipu_root
]
library_dirs
+=
[
dipu_root
]
...
@@ -395,12 +397,22 @@ def get_extensions():
...
@@ -395,12 +397,22 @@ def get_extensions():
elif
(
os
.
getenv
(
'FORCE_NPU'
,
'0'
)
==
'1'
):
elif
(
os
.
getenv
(
'FORCE_NPU'
,
'0'
)
==
'1'
):
print
(
f
'Compiling
{
ext_name
}
only with CPU and NPU'
)
print
(
f
'Compiling
{
ext_name
}
only with CPU and NPU'
)
try
:
try
:
import
importlib
from
torch_npu.utils.cpp_extension
import
NpuExtension
from
torch_npu.utils.cpp_extension
import
NpuExtension
extra_compile_args
[
'cxx'
]
+=
[
'-D__FILENAME__=
\"
$$(notdir $$(abspath $$<))
\"
'
]
extra_compile_args
[
'cxx'
]
+=
[
'-I'
+
importlib
.
util
.
find_spec
(
'torch_npu'
).
submodule_search_locations
[
0
]
+
'/include/third_party/acl/inc'
]
define_macros
+=
[(
'MMCV_WITH_NPU'
,
None
)]
define_macros
+=
[(
'MMCV_WITH_NPU'
,
None
)]
extension
=
NpuExtension
extension
=
NpuExtension
if
parse_version
(
torch
.
__version__
)
<
=
parse_version
(
'2.
0
.0'
):
if
parse_version
(
torch
.
__version__
)
<
parse_version
(
'2.
1
.0'
):
define_macros
+=
[(
'MMCV_WITH_XLA'
,
None
)]
define_macros
+=
[(
'MMCV_WITH_XLA'
,
None
)]
if
parse_version
(
torch
.
__version__
)
>
parse_version
(
'2.
0
.0'
):
if
parse_version
(
torch
.
__version__
)
>
=
parse_version
(
'2.
1
.0'
):
define_macros
+=
[(
'MMCV_WITH_KPRIVATE'
,
None
)]
define_macros
+=
[(
'MMCV_WITH_KPRIVATE'
,
None
)]
except
Exception
:
except
Exception
:
raise
ImportError
(
'can not find any torch_npu'
)
raise
ImportError
(
'can not find any torch_npu'
)
...
...
tests/test_ops/test_ball_query.py
View file @
0e2f8a5c
...
@@ -3,7 +3,7 @@ import pytest
...
@@ -3,7 +3,7 @@ import pytest
import
torch
import
torch
from
mmcv.ops
import
ball_query
from
mmcv.ops
import
ball_query
from
mmcv.utils
import
IS_CUDA_AVAILABLE
,
IS_MLU_AVAILABLE
from
mmcv.utils
import
IS_CUDA_AVAILABLE
,
IS_MLU_AVAILABLE
,
IS_NPU_AVAILABLE
@
pytest
.
mark
.
parametrize
(
'device'
,
[
@
pytest
.
mark
.
parametrize
(
'device'
,
[
...
@@ -14,7 +14,11 @@ from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE
...
@@ -14,7 +14,11 @@ from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE
pytest
.
param
(
pytest
.
param
(
'mlu'
,
'mlu'
,
marks
=
pytest
.
mark
.
skipif
(
marks
=
pytest
.
mark
.
skipif
(
not
IS_MLU_AVAILABLE
,
reason
=
'requires MLU support'
))
not
IS_MLU_AVAILABLE
,
reason
=
'requires MLU support'
)),
pytest
.
param
(
'npu'
,
marks
=
pytest
.
mark
.
skipif
(
not
IS_NPU_AVAILABLE
,
reason
=
'requires NPU support'
))
])
])
def
test_ball_query
(
device
):
def
test_ball_query
(
device
):
new_xyz
=
torch
.
tensor
(
new_xyz
=
torch
.
tensor
(
...
@@ -59,20 +63,25 @@ def test_ball_query(device):
...
@@ -59,20 +63,25 @@ def test_ball_query(device):
assert
torch
.
all
(
idx
==
expected_idx
)
assert
torch
.
all
(
idx
==
expected_idx
)
@
pytest
.
mark
.
skipif
(
@
pytest
.
mark
.
parametrize
(
'device'
,
[
not
torch
.
cuda
.
is_available
(),
reason
=
'requires CUDA support'
)
pytest
.
param
(
def
test_stack_ball_query
():
'cuda'
,
new_xyz
=
torch
.
tensor
([[
-
0.0740
,
1.3147
,
-
1.3625
],
marks
=
pytest
.
mark
.
skipif
(
[
-
2.2769
,
2.7817
,
-
0.2334
],
not
IS_CUDA_AVAILABLE
,
reason
=
'requires CUDA support'
)),
[
-
0.4003
,
2.4666
,
-
0.5116
],
pytest
.
param
(
[
-
0.0740
,
1.3147
,
-
1.3625
],
'npu'
,
[
-
0.0740
,
1.3147
,
-
1.3625
],
marks
=
pytest
.
mark
.
skipif
(
[
-
2.0289
,
2.4952
,
-
0.1708
],
not
IS_NPU_AVAILABLE
,
reason
=
'requires NPU support'
))
[
-
2.0668
,
6.0278
,
-
0.4875
],
])
[
0.4066
,
1.4211
,
-
0.2947
],
def
test_stack_ball_query
(
device
):
[
-
2.0289
,
2.4952
,
-
0.1708
],
new_xyz
=
torch
.
tensor
(
[
-
2.0289
,
2.4952
,
-
0.1708
]]).
cuda
()
[[
-
0.0740
,
1.3147
,
-
1.3625
],
[
-
2.2769
,
2.7817
,
-
0.2334
],
new_xyz_batch_cnt
=
torch
.
tensor
([
5
,
5
],
dtype
=
torch
.
int32
).
cuda
()
[
-
0.4003
,
2.4666
,
-
0.5116
],
[
-
0.0740
,
1.3147
,
-
1.3625
],
[
-
0.0740
,
1.3147
,
-
1.3625
],
[
-
2.0289
,
2.4952
,
-
0.1708
],
[
-
2.0668
,
6.0278
,
-
0.4875
],
[
0.4066
,
1.4211
,
-
0.2947
],
[
-
2.0289
,
2.4952
,
-
0.1708
],
[
-
2.0289
,
2.4952
,
-
0.1708
]],
device
=
device
)
new_xyz_batch_cnt
=
torch
.
tensor
([
5
,
5
],
dtype
=
torch
.
int32
,
device
=
device
)
xyz
=
torch
.
tensor
([[
-
0.0740
,
1.3147
,
-
1.3625
],
[
0.5555
,
1.0399
,
-
1.3634
],
xyz
=
torch
.
tensor
([[
-
0.0740
,
1.3147
,
-
1.3625
],
[
0.5555
,
1.0399
,
-
1.3634
],
[
-
0.4003
,
2.4666
,
-
0.5116
],
[
-
0.5251
,
2.4379
,
-
0.8466
],
[
-
0.4003
,
2.4666
,
-
0.5116
],
[
-
0.5251
,
2.4379
,
-
0.8466
],
[
-
0.9691
,
1.1418
,
-
1.3733
],
[
-
0.2232
,
0.9561
,
-
1.3626
],
[
-
0.9691
,
1.1418
,
-
1.3733
],
[
-
0.2232
,
0.9561
,
-
1.3626
],
...
@@ -82,15 +91,15 @@ def test_stack_ball_query():
...
@@ -82,15 +91,15 @@ def test_stack_ball_query():
[
-
2.0668
,
6.0278
,
-
0.4875
],
[
-
1.9304
,
3.3092
,
0.6610
],
[
-
2.0668
,
6.0278
,
-
0.4875
],
[
-
1.9304
,
3.3092
,
0.6610
],
[
0.0949
,
1.4332
,
0.3140
],
[
-
1.2879
,
2.0008
,
-
0.7791
],
[
0.0949
,
1.4332
,
0.3140
],
[
-
1.2879
,
2.0008
,
-
0.7791
],
[
-
0.7252
,
0.9611
,
-
0.6371
],
[
0.4066
,
1.4211
,
-
0.2947
],
[
-
0.7252
,
0.9611
,
-
0.6371
],
[
0.4066
,
1.4211
,
-
0.2947
],
[
0.3220
,
1.4447
,
0.3548
],
[
-
0.9744
,
2.3856
,
[
0.3220
,
1.4447
,
0.3548
],
[
-
0.9744
,
2.3856
,
-
1.2000
]],
-
1.2000
]]).
cuda
(
)
device
=
device
)
xyz_batch_cnt
=
torch
.
tensor
([
10
,
10
],
dtype
=
torch
.
int32
).
cuda
(
)
xyz_batch_cnt
=
torch
.
tensor
([
10
,
10
],
dtype
=
torch
.
int32
,
device
=
device
)
idx
=
ball_query
(
0
,
0.2
,
5
,
xyz
,
new_xyz
,
xyz_batch_cnt
,
new_xyz_batch_cnt
)
idx
=
ball_query
(
0
,
0.2
,
5
,
xyz
,
new_xyz
,
xyz_batch_cnt
,
new_xyz_batch_cnt
)
expected_idx
=
torch
.
tensor
(
[[
0
,
0
,
0
,
0
,
0
],
[
6
,
6
,
6
,
6
,
6
],
expected_idx
=
torch
.
tensor
(
[
2
,
2
,
2
,
2
,
2
],
[
0
,
0
,
0
,
0
,
0
],
[[
0
,
0
,
0
,
0
,
0
],
[
6
,
6
,
6
,
6
,
6
],
[
2
,
2
,
2
,
2
,
2
],
[
0
,
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
0
],
[
2
,
2
,
2
,
2
,
2
],
[
7
,
7
,
7
,
7
,
7
],
[
2
,
2
,
2
,
2
,
2
],
[
7
,
7
,
7
,
7
,
7
],
[
0
,
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
0
]
],
[
0
,
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
0
]]).
cuda
(
)
device
=
device
)
assert
torch
.
all
(
idx
==
expected_idx
)
assert
torch
.
all
(
idx
==
expected_idx
)
xyz
=
xyz
.
double
()
xyz
=
xyz
.
double
()
...
...
tests/test_ops/test_chamfer_distance.py
View file @
0e2f8a5c
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
import
numpy
as
np
import
pytest
import
pytest
import
torch
import
torch
from
mmcv.ops
import
chamfer_distance
from
mmcv.ops
import
chamfer_distance
from
mmcv.utils
import
IS_CUDA_AVAILABLE
,
IS_NPU_AVAILABLE
@
pytest
.
mark
.
skipif
(
def
chamfer_distance_forward_groundtruth
(
xyz1
,
xyz2
,
dtype
):
not
torch
.
cuda
.
is_available
(),
reason
=
'requires CUDA support'
)
bs
,
ns
,
ss
=
xyz1
.
shape
def
test_chamfer_distance
():
dist1
=
np
.
zeros
((
bs
,
ns
)).
astype
(
torch_to_np_type
(
dtype
))
pointset1
=
torch
.
tensor
(
dist2
=
np
.
zeros
((
bs
,
ns
)).
astype
(
torch_to_np_type
(
dtype
))
[[[
1.3
,
9.39
],
[
2.3
,
9.39
],
[
2.3
,
10.39
],
[
1.3
,
10.39
]],
idx1
=
np
.
zeros
((
bs
,
ns
)).
astype
(
'int32'
)
[[
1.0
,
9.39
],
[
3.0
,
9.39
],
[
3.0
,
10.39
],
[
1.0
,
10.39
]],
idx2
=
np
.
zeros
((
bs
,
ns
)).
astype
(
'int32'
)
[[
1.6
,
9.99
],
[
2.3
,
9.99
],
[
2.3
,
10.39
],
[
1.6
,
10.39
]]],
for
b1
in
range
(
bs
):
device
=
'cuda'
,
for
n1
in
range
(
ns
):
requires_grad
=
True
)
x1
,
y1
=
xyz1
[
b1
][
n1
]
dist1
[
b1
][
n1
]
=
10000000
for
n2
in
range
(
ns
):
x2
,
y2
=
xyz2
[
b1
][
n2
]
dst
=
(
x1
-
x2
)
*
(
x1
-
x2
)
+
(
y1
-
y2
)
*
(
y1
-
y2
)
if
dist1
[
b1
][
n1
]
>
dst
:
dist1
[
b1
][
n1
]
=
dst
idx1
[
b1
][
n1
]
=
n2
for
b1
in
range
(
bs
):
for
n1
in
range
(
ns
):
x1
,
y1
=
xyz2
[
b1
][
n1
]
dist2
[
b1
][
n1
]
=
10000000
for
n2
in
range
(
ns
):
x2
,
y2
=
xyz1
[
b1
][
n2
]
dst
=
(
x1
-
x2
)
*
(
x1
-
x2
)
+
(
y1
-
y2
)
*
(
y1
-
y2
)
if
dist2
[
b1
][
n1
]
>
dst
:
dist2
[
b1
][
n1
]
=
dst
idx2
[
b1
][
n1
]
=
n2
return
[
dist1
,
dist2
,
idx1
,
idx2
]
pointset2
=
torch
.
tensor
(
[[[
1.0
,
9.39
],
[
3.0
,
9.39
],
[
3.0
,
10.39
],
[
1.0
,
10.39
]],
[[
1.3
,
9.39
],
[
2.3
,
9.39
],
[
2.3
,
10.39
],
[
1.3
,
10.39
]],
[[
1.0
,
9.39
],
[
3.0
,
9.39
],
[
3.0
,
10.39
],
[
1.0
,
10.39
]]],
device
=
'cuda'
,
requires_grad
=
True
)
expected_dist1
=
torch
.
tensor
(
def
torch_to_np_type
(
dtype
):
[[
0.0900
,
0.4900
,
0.4900
,
0.0900
],
[
0.0900
,
0.4900
,
0.4900
,
0.0900
],
if
dtype
==
torch
.
half
:
[
0.5200
,
0.6500
,
0.4900
,
0.3600
]],
return
np
.
float16
device
=
'cuda'
)
elif
dtype
==
torch
.
float32
:
expected_dist2
=
torch
.
tensor
(
return
np
.
float32
[[
0.0900
,
0.4900
,
0.4900
,
0.0900
],
[
0.0900
,
0.4900
,
0.4900
,
0.0900
],
[
0.7200
,
0.8500
,
0.4900
,
0.3600
]],
device
=
'cuda'
)
expected_pointset1_grad
=
torch
.
tensor
(
[[[
0.6000
,
0.0000
],
[
-
1.4000
,
0.0000
],
[
-
1.4000
,
0.0000
],
[
0.6000
,
0.0000
]],
[[
-
0.6000
,
0.0000
],
[
1.4000
,
0.0000
],
[
1.4000
,
0.0000
],
[
-
0.6000
,
0.0000
]],
[[
1.2000
,
-
0.8000
],
[
-
1.4000
,
-
0.8000
],
[
-
1.4000
,
0.0000
],
[
1.2000
,
0.0000
]]],
device
=
'cuda'
)
expected_pointset2_grad
=
torch
.
tensor
(
@
pytest
.
mark
.
parametrize
(
'device'
,
[
[[[
-
0.6000
,
0.0000
],
[
1.4000
,
0.0000
],
[
1.4000
,
0.0000
],
pytest
.
param
(
[
-
0.6000
,
0.0000
]],
'cuda'
,
[[
0.6000
,
0.0000
],
[
-
1.4000
,
0.0000
],
[
-
1.4000
,
0.0000
],
marks
=
pytest
.
mark
.
skipif
(
[
0.6000
,
0.0000
]],
not
IS_CUDA_AVAILABLE
,
reason
=
'requires CUDA support'
)),
[[
0.0000
,
0.0000
],
[
0.0000
,
0.0000
],
[
2.8000
,
0.8000
],
pytest
.
param
(
[
-
2.4000
,
0.8000
]]],
'npu'
,
device
=
'cuda'
)
marks
=
pytest
.
mark
.
skipif
(
not
IS_NPU_AVAILABLE
,
reason
=
'requires NPU support'
))
dist1
,
dist2
,
idx1
,
idx2
=
chamfer_distance
(
pointset1
,
pointset2
)
])
dist1
.
backward
(
torch
.
ones_like
(
dist1
))
@
pytest
.
mark
.
parametrize
(
'dtype'
,
[
torch
.
half
,
torch
.
float32
])
assert
torch
.
allclose
(
dist1
,
expected_dist1
,
1e-2
)
@
pytest
.
mark
.
parametrize
(
'shape'
,
[(
2
,
600
,
2
),
(
2
,
600
,
2
)])
assert
torch
.
allclose
(
dist2
,
expected_dist2
,
1e-2
)
def
test_chamfer_distance_npu_dynamic_shape
(
dtype
,
device
,
shape
):
assert
torch
.
allclose
(
pointset1
.
grad
.
data
,
expected_pointset1_grad
,
1e-2
)
bs
=
shape
[
0
]
assert
torch
.
allclose
(
pointset2
.
grad
.
data
,
expected_pointset2_grad
,
1e-2
)
ns
=
shape
[
1
]
xyz1
=
np
.
random
.
uniform
(
-
10.0
,
10.0
,
(
bs
,
ns
,
2
)).
astype
(
torch_to_np_type
(
dtype
))
xyz2
=
np
.
random
.
uniform
(
-
10.0
,
10.0
,
(
bs
,
ns
,
2
)).
astype
(
torch_to_np_type
(
dtype
))
xyz1_npu
=
torch
.
tensor
(
xyz1
,
dtype
=
dtype
).
to
(
device
)
xyz2_npu
=
torch
.
tensor
(
xyz2
,
dtype
=
dtype
).
to
(
device
)
expected_output
=
chamfer_distance_forward_groundtruth
(
xyz1
,
xyz2
,
dtype
)
output
=
chamfer_distance
(
xyz1_npu
,
xyz2_npu
)
assert
np
.
allclose
(
output
[
0
].
cpu
().
numpy
(),
expected_output
[
0
],
1e-3
,
1e-4
)
assert
np
.
allclose
(
output
[
1
].
cpu
().
numpy
(),
expected_output
[
1
],
1e-3
,
1e-4
)
assert
np
.
allclose
(
output
[
2
].
cpu
().
numpy
(),
expected_output
[
2
],
1e-3
,
1e-4
)
assert
np
.
allclose
(
output
[
3
].
cpu
().
numpy
(),
expected_output
[
3
],
1e-3
,
1e-4
)
tests/test_ops/test_group_points.py
View file @
0e2f8a5c
...
@@ -72,13 +72,23 @@ def test_grouping_points(dtype, device):
...
@@ -72,13 +72,23 @@ def test_grouping_points(dtype, device):
assert
torch
.
allclose
(
output
,
expected_output
)
assert
torch
.
allclose
(
output
,
expected_output
)
@
pytest
.
mark
.
skipif
(
@
pytest
.
mark
.
parametrize
(
'device'
,
[
not
torch
.
cuda
.
is_available
(),
reason
=
'requires CUDA support'
)
pytest
.
param
(
'cuda'
,
marks
=
pytest
.
mark
.
skipif
(
not
IS_CUDA_AVAILABLE
,
reason
=
'requires CUDA support'
)),
pytest
.
param
(
'npu'
,
marks
=
pytest
.
mark
.
skipif
(
not
IS_NPU_AVAILABLE
,
reason
=
'requires NPU support'
))
])
@
pytest
.
mark
.
parametrize
(
'dtype'
,
[
torch
.
half
,
torch
.
float
,
torch
.
double
])
@
pytest
.
mark
.
parametrize
(
'dtype'
,
[
torch
.
half
,
torch
.
float
,
torch
.
double
])
def
test_stack_grouping_points
(
dtype
):
def
test_stack_grouping_points
(
dtype
,
device
):
if
device
==
'npu'
and
dtype
==
torch
.
double
:
return
idx
=
torch
.
tensor
([[
0
,
0
,
0
],
[
3
,
3
,
3
],
[
8
,
8
,
8
],
[
1
,
1
,
1
],
[
0
,
0
,
0
],
idx
=
torch
.
tensor
([[
0
,
0
,
0
],
[
3
,
3
,
3
],
[
8
,
8
,
8
],
[
1
,
1
,
1
],
[
0
,
0
,
0
],
[
2
,
2
,
2
],
[
0
,
0
,
0
],
[
6
,
6
,
6
],
[
9
,
9
,
9
],
[
0
,
0
,
0
],
[
2
,
2
,
2
],
[
0
,
0
,
0
],
[
6
,
6
,
6
],
[
9
,
9
,
9
],
[
0
,
0
,
0
],
[
1
,
1
,
1
],
[
0
,
0
,
0
]]).
int
().
cuda
(
)
[
1
,
1
,
1
],
[
0
,
0
,
0
]]).
int
().
to
(
device
)
features
=
torch
.
tensor
([[
features
=
torch
.
tensor
([[
0.5798
,
-
0.7981
,
-
0.9280
,
-
1.3311
,
1.3687
,
0.9277
,
-
0.4164
,
-
1.8274
,
0.5798
,
-
0.7981
,
-
0.9280
,
-
1.3311
,
1.3687
,
0.9277
,
-
0.4164
,
-
1.8274
,
0.9268
,
0.8414
0.9268
,
0.8414
...
@@ -103,9 +113,9 @@ def test_stack_grouping_points(dtype):
...
@@ -103,9 +113,9 @@ def test_stack_grouping_points(dtype):
-
0.6646
,
-
0.6870
,
-
0.1125
,
-
0.2224
,
-
0.3445
,
-
0.6646
,
-
0.6870
,
-
0.1125
,
-
0.2224
,
-
0.3445
,
-
1.4049
,
0.4990
,
-
0.7037
,
-
0.9924
,
0.0386
-
1.4049
,
0.4990
,
-
0.7037
,
-
0.9924
,
0.0386
]],
]],
dtype
=
dtype
).
cuda
(
)
dtype
=
dtype
).
to
(
device
)
features_batch_cnt
=
torch
.
tensor
([
3
,
3
]).
int
().
cuda
(
)
features_batch_cnt
=
torch
.
tensor
([
3
,
3
]).
int
().
to
(
device
)
indices_batch_cnt
=
torch
.
tensor
([
6
,
6
]).
int
().
cuda
(
)
indices_batch_cnt
=
torch
.
tensor
([
6
,
6
]).
int
().
to
(
device
)
output
=
grouping_operation
(
features
,
idx
,
features_batch_cnt
,
output
=
grouping_operation
(
features
,
idx
,
features_batch_cnt
,
indices_batch_cnt
)
indices_batch_cnt
)
expected_output
=
torch
.
tensor
(
expected_output
=
torch
.
tensor
(
...
@@ -169,5 +179,5 @@ def test_stack_grouping_points(dtype):
...
@@ -169,5 +179,5 @@ def test_stack_grouping_points(dtype):
[
-
0.3190
,
-
0.3190
,
-
0.3190
],
[
0.7798
,
0.7798
,
0.7798
],
[
-
0.3190
,
-
0.3190
,
-
0.3190
],
[
0.7798
,
0.7798
,
0.7798
],
[
-
0.3693
,
-
0.3693
,
-
0.3693
],
[
-
0.9457
,
-
0.9457
,
-
0.9457
],
[
-
0.3693
,
-
0.3693
,
-
0.3693
],
[
-
0.9457
,
-
0.9457
,
-
0.9457
],
[
-
0.2942
,
-
0.2942
,
-
0.2942
],
[
-
1.8527
,
-
1.8527
,
-
1.8527
]]],
[
-
0.2942
,
-
0.2942
,
-
0.2942
],
[
-
1.8527
,
-
1.8527
,
-
1.8527
]]],
dtype
=
dtype
).
cuda
(
)
dtype
=
dtype
).
to
(
device
)
assert
torch
.
allclose
(
output
,
expected_output
)
assert
torch
.
allclose
(
output
,
expected_output
)
tests/test_ops/test_ms_deformable_attn.py
View file @
0e2f8a5c
...
@@ -5,7 +5,7 @@ import torch
...
@@ -5,7 +5,7 @@ import torch
from
mmcv.ops.multi_scale_deform_attn
import
(
from
mmcv.ops.multi_scale_deform_attn
import
(
MultiScaleDeformableAttention
,
MultiScaleDeformableAttnFunction
,
MultiScaleDeformableAttention
,
MultiScaleDeformableAttnFunction
,
multi_scale_deformable_attn_pytorch
)
multi_scale_deformable_attn_pytorch
)
from
mmcv.utils
import
IS_CUDA_AVAILABLE
,
IS_MLU_AVAILABLE
from
mmcv.utils
import
IS_CUDA_AVAILABLE
,
IS_MLU_AVAILABLE
,
IS_NPU_AVAILABLE
_USING_PARROTS
=
True
_USING_PARROTS
=
True
_IS_AUTOCAST_AVAILABLE
=
True
_IS_AUTOCAST_AVAILABLE
=
True
...
@@ -136,6 +136,40 @@ def test_forward_equal_with_pytorch_double():
...
@@ -136,6 +136,40 @@ def test_forward_equal_with_pytorch_double():
assert
max_rel_err
<
1e-15
assert
max_rel_err
<
1e-15
@
pytest
.
mark
.
skipif
(
not
IS_NPU_AVAILABLE
,
reason
=
'requires NPU support'
)
def
test_forward_equal_with_pytorch_npu
():
N
,
M
,
D
=
6
,
4
,
8
Lq
,
L
,
P
=
10000
,
4
,
8
shapes
=
torch
.
as_tensor
([(
60
,
40
),
(
30
,
20
),
(
16
,
24
),
(
53
,
32
)],
dtype
=
torch
.
int32
)
level_start_index
=
torch
.
cat
((
shapes
.
new_zeros
(
(
1
,
)),
shapes
.
prod
(
1
).
cumsum
(
0
)[:
-
1
]))
S
=
sum
((
H
*
W
).
item
()
for
H
,
W
in
shapes
)
torch
.
manual_seed
(
3
)
value
=
torch
.
rand
(
N
,
S
,
M
,
D
)
*
0.01
sampling_locations
=
torch
.
rand
(
N
,
Lq
,
M
,
L
,
P
,
2
)
attention_weights
=
torch
.
rand
(
N
,
Lq
,
M
,
L
,
P
)
+
1e-5
attention_weights
/=
attention_weights
.
sum
(
-
1
,
keepdim
=
True
).
sum
(
-
2
,
keepdim
=
True
)
im2col_step
=
2
output_pytorch
=
multi_scale_deformable_attn_pytorch
(
value
.
float
(),
shapes
,
sampling_locations
.
float
(),
attention_weights
.
float
()).
detach
().
cpu
()
output_npu
=
MultiScaleDeformableAttnFunction
.
apply
(
value
.
npu
().
float
(),
shapes
.
npu
(),
level_start_index
.
npu
(),
sampling_locations
.
npu
().
float
(),
attention_weights
.
npu
().
float
(),
im2col_step
).
detach
().
cpu
()
assert
torch
.
allclose
(
output_npu
,
output_pytorch
)
max_abs_err
=
(
output_npu
-
output_pytorch
).
abs
().
max
()
max_rel_err
=
((
output_npu
-
output_pytorch
).
abs
()
/
output_pytorch
.
abs
()).
max
()
assert
max_abs_err
<
1e-18
assert
max_rel_err
<
1e-15
@
pytest
.
mark
.
parametrize
(
'device'
,
[
@
pytest
.
mark
.
parametrize
(
'device'
,
[
pytest
.
param
(
pytest
.
param
(
'cuda'
,
'cuda'
,
...
@@ -303,3 +337,67 @@ def test_gradient_numerical(channels,
...
@@ -303,3 +337,67 @@ def test_gradient_numerical(channels,
im2col_step
),
im2col_step
),
eps
=
eps
,
eps
=
eps
,
atol
=
1e-2
)
atol
=
1e-2
)
@
pytest
.
mark
.
skipif
(
not
IS_NPU_AVAILABLE
,
reason
=
'requires NPU support'
)
def
test_backward_equal_with_pytorch_npu
():
N
,
M
,
D
=
6
,
4
,
8
Lq
,
L
,
P
=
10000
,
4
,
8
shapes
=
torch
.
as_tensor
([(
60
,
40
),
(
30
,
20
),
(
16
,
24
),
(
53
,
32
)],
dtype
=
torch
.
int32
)
level_start_index
=
torch
.
cat
((
shapes
.
new_zeros
(
(
1
,
)),
shapes
.
prod
(
1
).
cumsum
(
0
)[:
-
1
]))
S
=
sum
((
H
*
W
).
item
()
for
H
,
W
in
shapes
)
torch
.
manual_seed
(
3
)
value
=
torch
.
rand
(
N
,
S
,
M
,
D
)
*
0.01
sampling_locations
=
torch
.
rand
(
N
,
Lq
,
M
,
L
,
P
,
2
)
attention_weights
=
torch
.
rand
(
N
,
Lq
,
M
,
L
,
P
)
+
1e-5
attention_weights
/=
attention_weights
.
sum
(
-
1
,
keepdim
=
True
).
sum
(
-
2
,
keepdim
=
True
)
im2col_step
=
2
value
.
requires_grad
=
True
sampling_locations
.
requires_grad
=
True
attention_weights
.
requires_grad
=
True
output_pytorch
=
multi_scale_deformable_attn_pytorch
(
value
.
float
(),
shapes
,
sampling_locations
.
float
(),
attention_weights
.
float
())
grad_output_pytorch
=
torch
.
ones_like
(
output_pytorch
)
output_pytorch
.
backward
(
grad_output_pytorch
)
grad_value
=
value
.
grad
.
detach
().
cpu
()
grad_location
=
sampling_locations
.
grad
.
detach
().
cpu
()
grad_attn_weight
=
attention_weights
.
grad
.
detach
().
cpu
()
value_npu
=
value
.
npu
()
shapes_npu
=
shapes
.
npu
()
level_start_index_npu
=
level_start_index
.
npu
()
sampling_locations_npu
=
sampling_locations
.
npu
()
attention_weights_npu
=
attention_weights
.
npu
()
output_npu
=
MultiScaleDeformableAttnFunction
.
apply
(
value_npu
.
float
(),
shapes_npu
,
level_start_index_npu
,
sampling_locations_npu
.
float
(),
attention_weights_npu
.
float
(),
im2col_step
)
grad_output_npu
=
torch
.
ones_like
(
output_npu
)
output_npu
.
backward
(
grad_output_npu
)
grad_value_npu
=
value_npu
.
grad
.
detach
().
cpu
()
grad_location_npu
=
sampling_locations_npu
.
grad
.
detach
().
cpu
()
grad_attn_weight_npu
=
attention_weights_npu
.
grad
.
detach
().
cpu
()
assert
torch
.
allclose
(
grad_value_npu
,
grad_value
)
max_abs_err_1
=
(
grad_value_npu
-
grad_value
).
abs
().
max
()
max_rel_err_1
=
((
grad_value_npu
-
grad_value
).
abs
()
/
grad_value
.
abs
()).
max
()
assert
max_abs_err_1
<
1e-5
assert
max_rel_err_1
<
1e-4
assert
torch
.
allclose
(
grad_location_npu
,
grad_location
)
max_abs_err_2
=
(
grad_location_npu
-
grad_location
).
abs
().
max
()
max_rel_err_2
=
((
grad_location_npu
-
grad_location
).
abs
()
/
grad_location
.
abs
()).
max
()
assert
max_abs_err_2
<
1e-5
assert
max_rel_err_2
<
1e-4
assert
torch
.
allclose
(
grad_attn_weight_npu
,
grad_attn_weight
)
max_abs_err_3
=
(
grad_attn_weight_npu
-
grad_attn_weight
).
abs
().
max
()
max_rel_err_3
=
((
grad_attn_weight_npu
-
grad_attn_weight
).
abs
()
/
grad_attn_weight
.
abs
()).
max
()
assert
max_abs_err_3
<
1e-5
assert
max_rel_err_3
<
1e-4
tests/test_ops/test_rotated_feature_align.py
View file @
0e2f8a5c
...
@@ -3,7 +3,7 @@ import pytest
...
@@ -3,7 +3,7 @@ import pytest
import
torch
import
torch
from
mmcv.ops
import
rotated_feature_align
from
mmcv.ops
import
rotated_feature_align
from
mmcv.utils
import
IS_CUDA_AVAILABLE
,
IS_MLU_AVAILABLE
from
mmcv.utils
import
IS_CUDA_AVAILABLE
,
IS_MLU_AVAILABLE
,
IS_NPU_AVAILABLE
@
pytest
.
mark
.
skipif
(
@
pytest
.
mark
.
skipif
(
...
@@ -17,6 +17,10 @@ from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE
...
@@ -17,6 +17,10 @@ from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE
'mlu'
,
'mlu'
,
marks
=
pytest
.
mark
.
skipif
(
marks
=
pytest
.
mark
.
skipif
(
not
IS_MLU_AVAILABLE
,
reason
=
'requires MLU support'
)),
not
IS_MLU_AVAILABLE
,
reason
=
'requires MLU support'
)),
pytest
.
param
(
'npu'
,
marks
=
pytest
.
mark
.
skipif
(
not
IS_NPU_AVAILABLE
,
reason
=
'requires NPU support'
)),
pytest
.
param
(
pytest
.
param
(
'cpu'
,
'cpu'
,
marks
=
pytest
.
mark
.
skipif
(
marks
=
pytest
.
mark
.
skipif
(
...
...
tests/test_ops/test_three_interpolate.py
View file @
0e2f8a5c
...
@@ -3,12 +3,28 @@ import pytest
...
@@ -3,12 +3,28 @@ import pytest
import
torch
import
torch
from
mmcv.ops
import
three_interpolate
from
mmcv.ops
import
three_interpolate
from
mmcv.utils
import
IS_CUDA_AVAILABLE
,
IS_NPU_AVAILABLE
@
pytest
.
mark
.
skipif
(
@
pytest
.
mark
.
parametrize
(
'dtype'
,
[
not
torch
.
cuda
.
is_available
(),
reason
=
'requires CUDA support'
)
torch
.
half
,
torch
.
float
,
@
pytest
.
mark
.
parametrize
(
'dtype'
,
[
torch
.
half
,
torch
.
float
,
torch
.
double
])
pytest
.
param
(
def
test_three_interpolate
(
dtype
):
torch
.
double
,
marks
=
pytest
.
mark
.
skipif
(
IS_NPU_AVAILABLE
,
reason
=
'NPU does not support for 64-bit floating point'
))
])
@
pytest
.
mark
.
parametrize
(
'device'
,
[
pytest
.
param
(
'cuda'
,
marks
=
pytest
.
mark
.
skipif
(
not
IS_CUDA_AVAILABLE
,
reason
=
'requires CUDA support'
)),
pytest
.
param
(
'npu'
,
marks
=
pytest
.
mark
.
skipif
(
not
IS_NPU_AVAILABLE
,
reason
=
'requires NPU support'
))
])
def
test_three_interpolate
(
dtype
,
device
):
features
=
torch
.
tensor
(
features
=
torch
.
tensor
(
[[[
2.4350
,
4.7516
,
4.4995
,
2.4350
,
2.4350
,
2.4350
],
[[[
2.4350
,
4.7516
,
4.4995
,
2.4350
,
2.4350
,
2.4350
],
[
3.1236
,
2.6278
,
3.0447
,
3.1236
,
3.1236
,
3.1236
],
[
3.1236
,
2.6278
,
3.0447
,
3.1236
,
3.1236
,
3.1236
],
...
@@ -20,12 +36,13 @@ def test_three_interpolate(dtype):
...
@@ -20,12 +36,13 @@ def test_three_interpolate(dtype):
[
0.0000
,
0.2744
,
2.0842
,
0.0000
,
0.0000
,
0.0000
],
[
0.0000
,
0.2744
,
2.0842
,
0.0000
,
0.0000
,
0.0000
],
[
0.3414
,
1.5063
,
1.6209
,
0.3414
,
0.3414
,
0.3414
],
[
0.3414
,
1.5063
,
1.6209
,
0.3414
,
0.3414
,
0.3414
],
[
0.5814
,
0.0103
,
0.0000
,
0.5814
,
0.5814
,
0.5814
]]],
[
0.5814
,
0.0103
,
0.0000
,
0.5814
,
0.5814
,
0.5814
]]],
dtype
=
dtype
).
cuda
()
dtype
=
dtype
,
device
=
device
)
idx
=
torch
.
tensor
(
[[[
0
,
1
,
2
],
[
2
,
3
,
4
],
[
2
,
3
,
4
],
[
0
,
1
,
2
],
[
0
,
1
,
2
],
idx
=
torch
.
tensor
(
[
0
,
1
,
3
]],
[[[
0
,
1
,
2
],
[
2
,
3
,
4
],
[
2
,
3
,
4
],
[
0
,
1
,
2
],
[
0
,
1
,
2
],
[
0
,
1
,
3
]],
[[
0
,
2
,
3
],
[
1
,
3
,
4
],
[
2
,
1
,
4
],
[
0
,
2
,
4
],
[
0
,
2
,
4
],
[[
0
,
2
,
3
],
[
1
,
3
,
4
],
[
2
,
1
,
4
],
[
0
,
2
,
4
],
[
0
,
2
,
4
],
[
0
,
1
,
2
]]],
[
0
,
1
,
2
]]]).
int
().
cuda
()
device
=
device
).
int
()
weight
=
torch
.
tensor
([[[
3.3333e-01
,
3.3333e-01
,
3.3333e-01
],
weight
=
torch
.
tensor
([[[
3.3333e-01
,
3.3333e-01
,
3.3333e-01
],
[
1.0000e+00
,
5.8155e-08
,
2.2373e-08
],
[
1.0000e+00
,
5.8155e-08
,
2.2373e-08
],
...
@@ -39,7 +56,8 @@ def test_three_interpolate(dtype):
...
@@ -39,7 +56,8 @@ def test_three_interpolate(dtype):
[
3.3333e-01
,
3.3333e-01
,
3.3333e-01
],
[
3.3333e-01
,
3.3333e-01
,
3.3333e-01
],
[
3.3333e-01
,
3.3333e-01
,
3.3333e-01
],
[
3.3333e-01
,
3.3333e-01
,
3.3333e-01
],
[
3.3333e-01
,
3.3333e-01
,
3.3333e-01
]]],
[
3.3333e-01
,
3.3333e-01
,
3.3333e-01
]]],
dtype
=
dtype
).
cuda
()
dtype
=
dtype
,
device
=
device
)
output
=
three_interpolate
(
features
,
idx
,
weight
)
output
=
three_interpolate
(
features
,
idx
,
weight
)
expected_output
=
torch
.
tensor
([[[
expected_output
=
torch
.
tensor
([[[
...
@@ -73,6 +91,7 @@ def test_three_interpolate(dtype):
...
@@ -73,6 +91,7 @@ def test_three_interpolate(dtype):
3.8760e-01
,
1.0300e-02
,
8.3569e-09
,
3.8760e-01
,
1.0300e-02
,
8.3569e-09
,
3.8760e-01
,
3.8760e-01
,
1.9723e-01
3.8760e-01
,
3.8760e-01
,
1.9723e-01
]]],
]]],
dtype
=
dtype
).
cuda
()
dtype
=
dtype
,
device
=
device
)
assert
torch
.
allclose
(
output
,
expected_output
,
1e-3
,
1e-4
)
assert
torch
.
allclose
(
output
,
expected_output
,
1e-3
,
1e-4
)
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment