Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
MMCV
Commits
0e2f8a5c
Commit
0e2f8a5c
authored
Mar 27, 2025
by
limm
Browse files
add v2.2.0
parent
2754cb11
Changes
51
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
622 additions
and
48 deletions
+622
-48
mmcv/ops/csrc/pytorch/focal_loss.cpp
mmcv/ops/csrc/pytorch/focal_loss.cpp
+24
-6
mmcv/ops/csrc/pytorch/modulated_deform_conv.cpp
mmcv/ops/csrc/pytorch/modulated_deform_conv.cpp
+36
-11
mmcv/ops/csrc/pytorch/nms.cpp
mmcv/ops/csrc/pytorch/nms.cpp
+19
-5
mmcv/ops/csrc/pytorch/npu/ball_query_npu.cpp
mmcv/ops/csrc/pytorch/npu/ball_query_npu.cpp
+39
-0
mmcv/ops/csrc/pytorch/npu/chamfer_distance_npu.cpp
mmcv/ops/csrc/pytorch/npu/chamfer_distance_npu.cpp
+40
-0
mmcv/ops/csrc/pytorch/npu/common_util.h
mmcv/ops/csrc/pytorch/npu/common_util.h
+12
-0
mmcv/ops/csrc/pytorch/npu/focal_loss_npu.cpp
mmcv/ops/csrc/pytorch/npu/focal_loss_npu.cpp
+16
-2
mmcv/ops/csrc/pytorch/npu/fused_bias_leakyrelu_npu.cpp
mmcv/ops/csrc/pytorch/npu/fused_bias_leakyrelu_npu.cpp
+3
-1
mmcv/ops/csrc/pytorch/npu/gather_points_npu.cpp
mmcv/ops/csrc/pytorch/npu/gather_points_npu.cpp
+5
-1
mmcv/ops/csrc/pytorch/npu/group_points_npu.cpp
mmcv/ops/csrc/pytorch/npu/group_points_npu.cpp
+2
-2
mmcv/ops/csrc/pytorch/npu/ms_deform_attn_npu.cpp
mmcv/ops/csrc/pytorch/npu/ms_deform_attn_npu.cpp
+134
-0
mmcv/ops/csrc/pytorch/npu/points_in_polygons_npu.cpp
mmcv/ops/csrc/pytorch/npu/points_in_polygons_npu.cpp
+1
-1
mmcv/ops/csrc/pytorch/npu/roi_align_npu.cpp
mmcv/ops/csrc/pytorch/npu/roi_align_npu.cpp
+5
-2
mmcv/ops/csrc/pytorch/npu/roi_align_rotated_npu.cpp
mmcv/ops/csrc/pytorch/npu/roi_align_rotated_npu.cpp
+69
-0
mmcv/ops/csrc/pytorch/npu/rotated_feature_align_npu.cpp
mmcv/ops/csrc/pytorch/npu/rotated_feature_align_npu.cpp
+52
-0
mmcv/ops/csrc/pytorch/npu/stack_ball_query_npu.cpp
mmcv/ops/csrc/pytorch/npu/stack_ball_query_npu.cpp
+23
-0
mmcv/ops/csrc/pytorch/npu/stack_group_points_npu.cpp
mmcv/ops/csrc/pytorch/npu/stack_group_points_npu.cpp
+25
-0
mmcv/ops/csrc/pytorch/npu/three_interpolate_npu.cpp
mmcv/ops/csrc/pytorch/npu/three_interpolate_npu.cpp
+59
-0
mmcv/ops/csrc/pytorch/roi_align.cpp
mmcv/ops/csrc/pytorch/roi_align.cpp
+30
-9
mmcv/ops/csrc/pytorch/voxelization.cpp
mmcv/ops/csrc/pytorch/voxelization.cpp
+28
-8
No files found.
mmcv/ops/csrc/pytorch/focal_loss.cpp
View file @
0e2f8a5c
...
...
@@ -5,9 +5,13 @@
#include <diopi/diopirt.h>
#include <diopi/functions.h>
#include <diopi/functions_mmcv.h>
#include <torch/csrc/utils/pybind.h>
#include "csrc_dipu/diopirt/diopirt_impl.h"
#include "csrc_dipu/runtime/device/deviceapis.h"
#include "csrc_dipu/utils/helpfunc.hpp"
using
dipu
::
VENDOR_TYPE
;
using
dipu
::
diopi_helper
::
toDiopiScalar
;
using
dipu
::
diopi_helper
::
toDiopiTensorHandle
;
#endif
...
...
@@ -57,9 +61,16 @@ void sigmoid_focal_loss_forward_diopi(Tensor input, Tensor target,
auto
weight_p
=
toDiopiTensorHandle
(
weight
);
auto
output_p
=
toDiopiTensorHandle
(
output
);
if
(
reinterpret_cast
<
void
*>
(
diopiSigmoidFocalLossMmcv
)
!=
nullptr
)
{
auto
ret
=
diopiSigmoidFocalLossMmcv
(
ch
,
output_p
,
input_p
,
target_p
,
weight_p
,
gamma
,
alpha
);
if
(
ret
==
diopiSuccess
)
return
;
if
(
strcmp
(
dipu
::
VendorTypeToStr
(
VENDOR_TYPE
),
"NPU"
)
==
0
)
{
pybind11
::
gil_scoped_release
no_gil
;
auto
ret
=
diopiSigmoidFocalLossMmcv
(
ch
,
output_p
,
input_p
,
target_p
,
weight_p
,
gamma
,
alpha
);
if
(
ret
==
diopiSuccess
)
return
;
}
else
{
auto
ret
=
diopiSigmoidFocalLossMmcv
(
ch
,
output_p
,
input_p
,
target_p
,
weight_p
,
gamma
,
alpha
);
if
(
ret
==
diopiSuccess
)
return
;
}
}
LOG
(
WARNING
)
<<
"Fallback to cpu: mmcv ext op sigmoid_focal_loss_forward_impl"
;
...
...
@@ -90,9 +101,16 @@ void sigmoid_focal_loss_backward_diopi(Tensor input, Tensor target,
auto
weight_p
=
toDiopiTensorHandle
(
weight
);
auto
grad_input_p
=
toDiopiTensorHandle
(
grad_input
);
if
(
reinterpret_cast
<
void
*>
(
diopiSigmoidFocalLossBackwardMmcv
)
!=
nullptr
)
{
auto
ret
=
diopiSigmoidFocalLossBackwardMmcv
(
ch
,
grad_input_p
,
input_p
,
target_p
,
weight_p
,
gamma
,
alpha
);
if
(
ret
==
diopiSuccess
)
return
;
if
(
strcmp
(
dipu
::
VendorTypeToStr
(
VENDOR_TYPE
),
"NPU"
)
==
0
)
{
pybind11
::
gil_scoped_release
no_gil
;
auto
ret
=
diopiSigmoidFocalLossBackwardMmcv
(
ch
,
grad_input_p
,
input_p
,
target_p
,
weight_p
,
gamma
,
alpha
);
if
(
ret
==
diopiSuccess
)
return
;
}
else
{
auto
ret
=
diopiSigmoidFocalLossBackwardMmcv
(
ch
,
grad_input_p
,
input_p
,
target_p
,
weight_p
,
gamma
,
alpha
);
if
(
ret
==
diopiSuccess
)
return
;
}
}
LOG
(
WARNING
)
<<
"Fallback to cpu: mmcv ext op sigmoid_focal_loss_forward_impl"
;
...
...
mmcv/ops/csrc/pytorch/modulated_deform_conv.cpp
View file @
0e2f8a5c
...
...
@@ -5,9 +5,13 @@
#include <diopi/diopirt.h>
#include <diopi/functions.h>
#include <diopi/functions_mmcv.h>
#include <torch/csrc/utils/pybind.h>
#include "csrc_dipu/diopirt/diopirt_impl.h"
#include "csrc_dipu/runtime/device/deviceapis.h"
#include "csrc_dipu/utils/helpfunc.hpp"
using
dipu
::
VENDOR_TYPE
;
using
dipu
::
diopi_helper
::
toDiopiScalar
;
using
dipu
::
diopi_helper
::
toDiopiTensorHandle
;
#endif
...
...
@@ -273,11 +277,20 @@ void modulated_deform_conv_forward_diopi(
auto
output_p
=
toDiopiTensorHandle
(
output
);
auto
columns_p
=
toDiopiTensorHandle
(
columns
);
if
(
reinterpret_cast
<
void
*>
(
diopiModulatedDeformConvMmcv
)
!=
nullptr
)
{
auto
ret
=
diopiModulatedDeformConvMmcv
(
ch
,
output_p
,
columns_p
,
ones_p
,
input_p
,
weight_p
,
bias_p
,
offset_p
,
mask_p
,
kernel_h
,
kernel_w
,
stride_h
,
stride_w
,
pad_h
,
pad_w
,
dilation_h
,
dilation_w
,
group
,
deformable_group
,
with_bias
);
if
(
ret
==
diopiSuccess
)
return
;
if
(
strcmp
(
dipu
::
VendorTypeToStr
(
VENDOR_TYPE
),
"NPU"
)
==
0
)
{
pybind11
::
gil_scoped_release
no_gil
;
auto
ret
=
diopiModulatedDeformConvMmcv
(
ch
,
output_p
,
columns_p
,
ones_p
,
input_p
,
weight_p
,
bias_p
,
offset_p
,
mask_p
,
kernel_h
,
kernel_w
,
stride_h
,
stride_w
,
pad_h
,
pad_w
,
dilation_h
,
dilation_w
,
group
,
deformable_group
,
with_bias
);
if
(
ret
==
diopiSuccess
)
return
;
}
else
{
auto
ret
=
diopiModulatedDeformConvMmcv
(
ch
,
output_p
,
columns_p
,
ones_p
,
input_p
,
weight_p
,
bias_p
,
offset_p
,
mask_p
,
kernel_h
,
kernel_w
,
stride_h
,
stride_w
,
pad_h
,
pad_w
,
dilation_h
,
dilation_w
,
group
,
deformable_group
,
with_bias
);
if
(
ret
==
diopiSuccess
)
return
;
}
}
LOG
(
WARNING
)
<<
"Fallback to cpu: mmcv ext op modulated_deform_conv_forward"
;
auto
input_cpu
=
input
.
cpu
();
...
...
@@ -331,12 +344,24 @@ void modulated_deform_conv_backward_diopi(
if
(
reinterpret_cast
<
void
*>
(
diopiModulatedDeformConvBackwardMmcv
)
!=
nullptr
)
{
auto
ret
=
diopiModulatedDeformConvBackwardMmcv
(
ch
,
grad_input_p
,
grad_weight_p
,
grad_bias_p
,
grad_offset_p
,
grad_mask_p
,
input_p
,
weight_p
,
bias_p
,
ones_p
,
offset_p
,
mask_p
,
columns_p
,
grad_output_p
,
kernel_h
,
kernel_w
,
stride_h
,
stride_w
,
pad_h
,
pad_w
,
dilation_h
,
dilation_w
,
group
,
deformable_group
,
with_bias
);
if
(
ret
==
diopiSuccess
)
return
;
if
(
strcmp
(
dipu
::
VendorTypeToStr
(
VENDOR_TYPE
),
"NPU"
)
==
0
)
{
pybind11
::
gil_scoped_release
no_gil
;
auto
ret
=
diopiModulatedDeformConvBackwardMmcv
(
ch
,
grad_input_p
,
grad_weight_p
,
grad_bias_p
,
grad_offset_p
,
grad_mask_p
,
input_p
,
weight_p
,
bias_p
,
ones_p
,
offset_p
,
mask_p
,
columns_p
,
grad_output_p
,
kernel_h
,
kernel_w
,
stride_h
,
stride_w
,
pad_h
,
pad_w
,
dilation_h
,
dilation_w
,
group
,
deformable_group
,
with_bias
);
if
(
ret
==
diopiSuccess
)
return
;
}
else
{
auto
ret
=
diopiModulatedDeformConvBackwardMmcv
(
ch
,
grad_input_p
,
grad_weight_p
,
grad_bias_p
,
grad_offset_p
,
grad_mask_p
,
input_p
,
weight_p
,
bias_p
,
ones_p
,
offset_p
,
mask_p
,
columns_p
,
grad_output_p
,
kernel_h
,
kernel_w
,
stride_h
,
stride_w
,
pad_h
,
pad_w
,
dilation_h
,
dilation_w
,
group
,
deformable_group
,
with_bias
);
if
(
ret
==
diopiSuccess
)
return
;
}
}
LOG
(
WARNING
)
<<
"Fallback to cpu: mmcv ext op modulated_deform_conv_forward"
;
auto
input_cpu
=
input
.
cpu
();
...
...
mmcv/ops/csrc/pytorch/nms.cpp
View file @
0e2f8a5c
...
...
@@ -5,10 +5,14 @@
#include <diopi/diopirt.h>
#include <diopi/functions.h>
#include <diopi/functions_mmcv.h>
#include <torch/csrc/utils/pybind.h>
#include "csrc_dipu/base/basedef.h"
#include "csrc_dipu/diopirt/diopirt_impl.h"
#include "csrc_dipu/runtime/device/deviceapis.h"
#include "csrc_dipu/utils/helpfunc.hpp"
using
dipu
::
VENDOR_TYPE
;
using
dipu
::
diopi_helper
::
toDiopiScalar
;
using
dipu
::
diopi_helper
::
toDiopiTensorHandle
;
#endif
...
...
@@ -45,11 +49,21 @@ Tensor nms_diopi(Tensor boxes, Tensor scores, float iou_threshold, int offset) {
auto
scores_p
=
toDiopiTensorHandle
(
scores
);
bool
is_mock_cuda
=
boxes
.
device
().
type
()
==
dipu
::
DIPU_DEVICE_TYPE
;
if
(
is_mock_cuda
&&
reinterpret_cast
<
void
*>
(
diopiNmsMmcv
)
!=
nullptr
)
{
auto
ret
=
diopiNmsMmcv
(
ch
,
outhandle
,
boxes_p
,
scores_p
,
iou_threshold
,
offset
);
if
(
ret
==
diopiSuccess
)
{
auto
tensorhandle
=
reinterpret_cast
<
Tensor
*>
(
*
outhandle
);
return
*
tensorhandle
;
if
(
strcmp
(
dipu
::
VendorTypeToStr
(
VENDOR_TYPE
),
"NPU"
)
==
0
)
{
pybind11
::
gil_scoped_release
no_gil
;
auto
ret
=
diopiNmsMmcv
(
ch
,
outhandle
,
boxes_p
,
scores_p
,
iou_threshold
,
offset
);
if
(
ret
==
diopiSuccess
)
{
auto
tensorhandle
=
reinterpret_cast
<
Tensor
*>
(
*
outhandle
);
return
*
tensorhandle
;
}
}
else
{
auto
ret
=
diopiNmsMmcv
(
ch
,
outhandle
,
boxes_p
,
scores_p
,
iou_threshold
,
offset
);
if
(
ret
==
diopiSuccess
)
{
auto
tensorhandle
=
reinterpret_cast
<
Tensor
*>
(
*
outhandle
);
return
*
tensorhandle
;
}
}
}
LOG
(
WARNING
)
<<
"Fallback to cpu: mmcv ext op nms"
;
...
...
mmcv/ops/csrc/pytorch/npu/ball_query_npu.cpp
0 → 100644
View file @
0e2f8a5c
#include "pytorch_npu_helper.hpp"
using
namespace
NPU_NAME_SPACE
;
using
namespace
std
;
void
ball_query_forward_npu
(
int
b
,
int
n
,
int
m
,
float
min_radius
,
float
max_radius
,
int
nsample
,
const
Tensor
new_xyz
,
const
Tensor
xyz
,
Tensor
idx
)
{
int64_t
nsample_i64
=
nsample
;
// transpose new_xyz from [B, M, 3] to [M, B, 3]
at
::
Tensor
new_xyz_transpose
=
new_xyz
.
transpose
(
0
,
1
);
// transpose xyz from [B, N, 3] to [B, 3, N]
at
::
Tensor
xyz_transpose
=
xyz
.
transpose
(
1
,
2
);
// transpose idx from [B, M, nsample] to [M, B, nsample]
at
::
Tensor
idx_transpose
=
idx
.
transpose
(
0
,
1
).
contiguous
();
OpCommand
cmd
;
cmd
.
Name
(
"BallQuery"
)
.
Input
(
xyz_transpose
)
.
Input
(
new_xyz_transpose
)
.
Output
(
idx_transpose
)
.
Attr
(
"min_radius"
,
min_radius
)
.
Attr
(
"max_radius"
,
max_radius
)
.
Attr
(
"sample_num"
,
nsample_i64
)
.
Run
();
idx_transpose
=
idx_transpose
.
transpose
(
0
,
1
).
contiguous
();
idx
.
copy_
(
idx_transpose
);
}
void
ball_query_forward_impl
(
int
b
,
int
n
,
int
m
,
float
min_radius
,
float
max_radius
,
int
nsample
,
const
Tensor
new_xyz
,
const
Tensor
xyz
,
Tensor
idx
);
REGISTER_NPU_IMPL
(
ball_query_forward_impl
,
ball_query_forward_npu
);
mmcv/ops/csrc/pytorch/npu/chamfer_distance_npu.cpp
0 → 100644
View file @
0e2f8a5c
#include "pytorch_npu_helper.hpp"
using
namespace
NPU_NAME_SPACE
;
using
namespace
std
;
void
chamfer_distance_forward_npu
(
Tensor
XYZ1
,
Tensor
XYZ2
,
Tensor
dist1
,
Tensor
dist2
,
Tensor
idx1
,
Tensor
idx2
)
{
at
::
Tensor
xyz1
=
at
::
ones_like
(
XYZ1
);
at
::
Tensor
xyz2
=
at
::
ones_like
(
XYZ2
);
xyz1
=
XYZ1
.
transpose
(
1
,
2
).
transpose
(
0
,
1
);
xyz2
=
XYZ2
.
transpose
(
1
,
2
).
transpose
(
0
,
1
);
OpCommand
cmd
;
cmd
.
Name
(
"ChamferDistance"
)
.
Input
(
xyz1
)
.
Input
(
xyz2
)
.
Output
(
dist1
)
.
Output
(
dist2
)
.
Output
(
idx1
)
.
Output
(
idx2
)
.
Run
();
}
void
chamfer_distance_backward_npu
(
Tensor
xyz1
,
Tensor
xyz2
,
Tensor
idx1
,
Tensor
idx2
,
Tensor
grad_dist1
,
Tensor
grad_dist2
,
Tensor
grad_xyz1
,
Tensor
grad_xyz2
)
{
EXEC_NPU_CMD
(
aclnnChamferDistanceBackward
,
xyz1
,
xyz2
,
idx1
,
idx2
,
grad_dist1
,
grad_dist2
,
grad_xyz1
,
grad_xyz2
);
}
void
chamfer_distance_forward_impl
(
Tensor
XYZ1
,
Tensor
XYZ2
,
Tensor
dist1
,
Tensor
dist2
,
Tensor
idx1
,
Tensor
idx2
);
REGISTER_NPU_IMPL
(
chamfer_distance_forward_impl
,
chamfer_distance_forward_npu
);
void
chamfer_distance_backward_impl
(
Tensor
xyz1
,
Tensor
xyz2
,
Tensor
idx1
,
Tensor
idx2
,
Tensor
grad_dist1
,
Tensor
grad_dist2
,
Tensor
grad_xyz1
,
Tensor
grad_xyz2
);
REGISTER_NPU_IMPL
(
chamfer_distance_backward_impl
,
chamfer_distance_backward_npu
);
mmcv/ops/csrc/pytorch/npu/common_util.h
0 → 100644
View file @
0e2f8a5c
#ifndef MMCV_OPS_CSRC_COMMON__UTIL_HPP_
#define MMCV_OPS_CSRC_COMMON__UTIL_HPP_
const
int
SIZE
=
8
;
c10
::
SmallVector
<
int64_t
,
SIZE
>
array_to_vector
(
c10
::
IntArrayRef
shape
)
{
c10
::
SmallVector
<
int64_t
,
SIZE
>
shape_small_vec
;
for
(
uint64_t
i
=
0
;
i
<
shape
.
size
();
i
++
)
{
shape_small_vec
.
emplace_back
(
shape
[
i
]);
}
}
#endif // MMCV_OPS_CSRC_COMMON__UTIL_HPP_
mmcv/ops/csrc/pytorch/npu/focal_loss_npu.cpp
View file @
0e2f8a5c
#include "pytorch_npu_helper.hpp"
using
namespace
NPU_NAME_SPACE
;
using
namespace
std
;
...
...
@@ -100,7 +99,22 @@ void softmax_focal_loss_forward_npu(Tensor input, Tensor target, Tensor weight,
c10
::
SmallVector
<
int64_t
,
2
>
sizes
=
{
n_batch
,
1
};
at
::
IntArrayRef
offset
=
at
::
IntArrayRef
(
offsets
);
at
::
IntArrayRef
size
=
at
::
IntArrayRef
(
sizes
);
at_npu
::
native
::
custom_ops
::
npu_slice_out
(
op_output
,
offset
,
size
,
output
);
at
::
IntArrayRef
size_array
=
at
::
IntArrayRef
(
sizes
);
c10
::
SmallVector
<
int64_t
,
8
>
offsetVec
;
for
(
uint64_t
i
=
0
;
i
<
offset
.
size
();
i
++
)
{
offsetVec
.
emplace_back
(
offset
[
i
]);
}
c10
::
SmallVector
<
int64_t
,
8
>
sizeVec
;
for
(
uint64_t
i
=
0
;
i
<
size_array
.
size
();
i
++
)
{
sizeVec
.
emplace_back
(
size_array
[
i
]);
}
OpCommand
cmd2
;
cmd2
.
Name
(
"Slice"
)
.
Input
(
op_output
)
.
Input
(
offsetVec
)
.
Input
(
sizeVec
)
.
Output
(
output
)
.
Run
();
}
void
softmax_focal_loss_forward_impl
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
...
...
mmcv/ops/csrc/pytorch/npu/fused_bias_leakyrelu_npu.cpp
View file @
0e2f8a5c
...
...
@@ -16,7 +16,9 @@ Tensor fused_bias_leakyrelu_npu(const Tensor &input, const Tensor &bias,
auto
input_size
=
input
.
sizes
();
int
input_length
=
input_size
.
size
();
c10
::
SmallVector
<
int64_t
,
SIZE
>
input_size_tmp
;
input_size_tmp
=
array_to_small_vector
(
input_size
);
for
(
uint64_t
i
=
0
;
i
<
input_size
.
size
();
i
++
)
{
input_size_tmp
.
emplace_back
(
input_size
[
i
]);
}
if
(
input_length
>
1
)
{
for
(
int
i
=
0
;
i
<
input_length
;
i
++
)
{
if
(
i
!=
1
)
{
...
...
mmcv/ops/csrc/pytorch/npu/gather_points_npu.cpp
View file @
0e2f8a5c
...
...
@@ -32,7 +32,11 @@ void gather_points_backward_npu(int b, int c, int n, int npoints,
indices
.
unsqueeze_
(
0
);
}
int64_t
dim
=
0
;
at
::
SmallVector
<
int64_t
,
N
>
pad_size
=
array_to_small_vector
(
idx
.
sizes
());
auto
shape
=
idx
.
sizes
();
c10
::
SmallVector
<
int64_t
,
8
>
pad_size
;
for
(
uint64_t
i
=
0
;
i
<
shape
.
size
();
i
++
)
{
pad_size
.
emplace_back
(
shape
[
i
]);
}
at
::
Tensor
trans_grad_points
=
grad_points
.
transpose
(
1
,
2
).
contiguous
();
at
::
Tensor
grad_points_view
=
trans_grad_points
.
view
(
{
trans_grad_points
.
sizes
()[
0
]
*
trans_grad_points
.
sizes
()[
1
],
...
...
mmcv/ops/csrc/pytorch/npu/group_points_npu.cpp
View file @
0e2f8a5c
...
...
@@ -20,7 +20,7 @@ void group_points_forward_npu(int b, int c, int n, int npoints, int nsample,
indices
=
indices
.
view
({
-
1
});
at
::
Tensor
trans_features
=
points
.
transpose
(
1
,
2
);
at
::
Tensor
features
=
NpuUtils
::
format_contiguous
(
trans_features
);
at
::
Tensor
features
=
trans_features
.
contiguous
(
);
features
=
features
.
view
({
b
*
n
,
c
});
OpCommand
cmd
;
...
...
@@ -34,7 +34,7 @@ void group_points_forward_npu(int b, int c, int n, int npoints, int nsample,
at
::
Tensor
output
=
out
.
view
({
b
,
npoints
,
nsample
,
c
}).
transpose
(
1
,
3
).
transpose
(
2
,
3
);
at
::
Tensor
res
=
NpuUtils
::
format_
contiguous
(
output
);
at
::
Tensor
res
=
output
.
contiguous
();
out
.
copy_
(
res
);
}
...
...
mmcv/ops/csrc/pytorch/npu/ms_deform_attn_npu.cpp
0 → 100644
View file @
0e2f8a5c
#include "pytorch_npu_helper.hpp"
using
namespace
NPU_NAME_SPACE
;
using
namespace
std
;
Tensor
ms_deform_attn_impl_forward
(
const
Tensor
&
value
,
const
Tensor
&
value_spatial_shapes
,
const
Tensor
&
value_level_start_index
,
const
Tensor
&
sampling_locations
,
const
Tensor
&
attention_weights
,
const
int
im2col_step
);
void
check_support
(
const
Tensor
&
value
,
const
Tensor
&
attention_weights
)
{
TORCH_CHECK
(
(
value
.
scalar_type
()
==
at
::
kFloat
||
value
.
scalar_type
()
==
at
::
kHalf
),
"Dtype of value should be float32 or float16."
);
int64_t
num_heads
=
value
.
size
(
2
);
int64_t
embed_dims
=
value
.
size
(
3
);
int64_t
num_points
=
attention_weights
.
size
(
4
);
TORCH_CHECK
((
num_heads
>=
4
&&
num_heads
<=
8
),
"num_heads should be in the range of [4, 8]"
);
TORCH_CHECK
((
embed_dims
>=
32
&&
embed_dims
<=
256
),
"embed_dims should be in the range of [32, 256]"
);
TORCH_CHECK
((
num_points
>=
4
&&
num_points
<=
8
),
"num_points should be in the range of [4, 8]"
);
}
Tensor
ms_deform_attn_forward_npu
(
const
Tensor
&
value
,
const
Tensor
&
value_spatial_shapes
,
const
Tensor
&
value_level_start_index
,
const
Tensor
&
sampling_locations
,
const
Tensor
&
attention_weights
,
const
int
im2col_step
)
{
check_support
(
value
,
attention_weights
);
at
::
Tensor
value_fp32
=
value
;
at
::
Tensor
value_spatial_shapes_int32
=
value_spatial_shapes
;
at
::
Tensor
value_level_start_index_int32
=
value_level_start_index
;
at
::
Tensor
sampling_locations_fp32
=
sampling_locations
;
at
::
Tensor
attention_weights_fp32
=
attention_weights
;
if
(
value
.
scalar_type
()
!=
at
::
kFloat
)
{
value_fp32
=
value
.
to
(
at
::
kFloat
);
}
if
(
value_spatial_shapes
.
scalar_type
()
!=
at
::
kInt
)
{
value_spatial_shapes_int32
=
value_spatial_shapes
.
to
(
at
::
kInt
);
}
if
(
value_level_start_index
.
scalar_type
()
!=
at
::
kInt
)
{
value_level_start_index_int32
=
value_level_start_index
.
to
(
at
::
kInt
);
}
if
(
sampling_locations
.
scalar_type
()
!=
at
::
kFloat
)
{
sampling_locations_fp32
=
sampling_locations
.
to
(
at
::
kFloat
);
}
if
(
attention_weights
.
scalar_type
()
!=
at
::
kFloat
)
{
attention_weights_fp32
=
attention_weights
.
to
(
at
::
kFloat
);
}
c10
::
SmallVector
<
int64_t
,
3
>
output_size
=
{
value
.
size
(
0
),
sampling_locations
.
size
(
1
),
value
.
size
(
2
)
*
value
.
size
(
3
)};
at
::
Tensor
output
=
at
::
zeros
(
output_size
,
value_fp32
.
options
());
OpCommand
cmd
;
cmd
.
Name
(
"MultiScaleDeformableAttnFunction"
)
.
Input
(
value_fp32
)
.
Input
(
value_spatial_shapes_int32
)
.
Input
(
value_level_start_index_int32
)
.
Input
(
sampling_locations_fp32
)
.
Input
(
attention_weights_fp32
)
.
Output
(
output
)
.
Run
();
at
::
Tensor
real_output
=
output
;
if
(
value
.
scalar_type
()
!=
at
::
kFloat
)
{
real_output
=
output
.
to
(
value
.
scalar_type
());
}
return
real_output
;
}
REGISTER_NPU_IMPL
(
ms_deform_attn_impl_forward
,
ms_deform_attn_forward_npu
);
void
ms_deform_attn_impl_backward
(
const
Tensor
&
value
,
const
Tensor
&
spatial_shapes
,
const
Tensor
&
level_start_index
,
const
Tensor
&
sampling_loc
,
const
Tensor
&
attn_weight
,
const
Tensor
&
grad_output
,
Tensor
&
grad_value
,
Tensor
&
grad_sampling_loc
,
Tensor
&
grad_attn_weight
,
const
int
im2col_step
);
void
ms_deform_attn_backward_npu
(
const
Tensor
&
value
,
const
Tensor
&
spatial_shapes
,
const
Tensor
&
level_start_index
,
const
Tensor
&
sampling_loc
,
const
Tensor
&
attn_weight
,
const
Tensor
&
grad_output
,
Tensor
&
grad_value
,
Tensor
&
grad_sampling_loc
,
Tensor
&
grad_attn_weight
,
const
int
im2col_step
)
{
check_support
(
value
,
attn_weight
);
at
::
Tensor
value_fp32
=
value
;
at
::
Tensor
spatial_shapes_int32
=
spatial_shapes
;
at
::
Tensor
level_start_index_int32
=
level_start_index
;
at
::
Tensor
sampling_loc_fp32
=
sampling_loc
.
transpose
(
4
,
5
).
contiguous
();
at
::
Tensor
attn_weight_fp32
=
attn_weight
;
at
::
Tensor
grad_output_fp32
=
grad_output
;
if
(
value
.
scalar_type
()
!=
at
::
kFloat
)
{
value_fp32
=
value
.
to
(
at
::
kFloat
);
}
if
(
spatial_shapes
.
scalar_type
()
!=
at
::
kInt
)
{
spatial_shapes_int32
=
spatial_shapes
.
to
(
at
::
kInt
);
}
if
(
level_start_index
.
scalar_type
()
!=
at
::
kInt
)
{
level_start_index_int32
=
level_start_index
.
to
(
at
::
kInt
);
}
if
(
sampling_loc
.
scalar_type
()
!=
at
::
kFloat
)
{
sampling_loc_fp32
=
sampling_loc_fp32
.
to
(
at
::
kFloat
);
}
if
(
attn_weight
.
scalar_type
()
!=
at
::
kFloat
)
{
attn_weight_fp32
=
attn_weight
.
to
(
at
::
kFloat
);
}
if
(
grad_output
.
scalar_type
()
!=
at
::
kFloat
)
{
grad_output_fp32
=
grad_output
.
to
(
at
::
kFloat
);
}
OpCommand
cmd
;
cmd
.
Name
(
"MultiScaleDeformableAttentionGrad"
)
.
Input
(
value_fp32
)
.
Input
(
spatial_shapes_int32
)
.
Input
(
level_start_index_int32
)
.
Input
(
sampling_loc_fp32
)
.
Input
(
attn_weight_fp32
)
.
Input
(
grad_output_fp32
)
.
Output
(
grad_value
)
.
Output
(
grad_sampling_loc
)
.
Output
(
grad_attn_weight
)
.
Run
();
grad_sampling_loc
=
grad_sampling_loc
.
transpose
(
4
,
5
).
contiguous
();
}
REGISTER_NPU_IMPL
(
ms_deform_attn_impl_backward
,
ms_deform_attn_backward_npu
);
mmcv/ops/csrc/pytorch/npu/points_in_polygons_npu.cpp
View file @
0e2f8a5c
...
...
@@ -12,7 +12,7 @@ void points_in_polygons_npu(const Tensor points, Tensor polygons, Tensor output,
"The batch of polygons tensor must be less than MAX_POLYGONS_BATCH"
);
at
::
Tensor
trans_polygons
=
polygons
.
transpose
(
0
,
1
);
OpCommand
cmd
;
at
::
Tensor
new_trans_polygons
=
NpuUtils
::
format_contiguous
(
trans_polygons
);
at
::
Tensor
new_trans_polygons
=
trans_polygons
.
contiguous
(
);
cmd
.
Name
(
"PointsInPolygons"
)
.
Input
(
points
,
(
string
)
"points"
)
.
Input
(
new_trans_polygons
,
(
string
)
"polygons"
)
...
...
mmcv/ops/csrc/pytorch/npu/roi_align_npu.cpp
View file @
0e2f8a5c
...
...
@@ -41,8 +41,11 @@ void roi_align_backward_npu(Tensor grad_output, Tensor rois, Tensor argmax_y,
LOG
(
WARNING
)
<<
"The [aligned] attr in roi_align_grad op is false"
;
roi_end_mode
=
0
;
}
c10
::
SmallVector
<
int64_t
,
SIZE
>
xdiff_shape
=
array_to_small_vector
(
grad_input
.
sizes
());
auto
shape
=
grad_input
.
sizes
();
c10
::
SmallVector
<
int64_t
,
SIZE
>
xdiff_shape
;
for
(
uint64_t
i
=
0
;
i
<
shape
.
size
();
i
++
)
{
xdiff_shape
.
emplace_back
(
shape
[
i
]);
}
OpCommand
cmd
;
cmd
.
Name
(
"ROIAlignGrad"
)
.
Input
(
grad_output
)
...
...
mmcv/ops/csrc/pytorch/npu/roi_align_rotated_npu.cpp
0 → 100644
View file @
0e2f8a5c
#include "pytorch_npu_helper.hpp"
using
namespace
NPU_NAME_SPACE
;
using
namespace
std
;
void
roi_align_rotated_forward_npu
(
Tensor
input
,
Tensor
rois
,
Tensor
output
,
int
aligned_height
,
int
aligned_width
,
float
spatial_scale
,
int
sampling_ratio
,
bool
aligned
,
bool
clockwise
)
{
int64_t
aligned_height_64
=
aligned_height
;
int64_t
aligned_width_64
=
aligned_width
;
int64_t
sampling_ratio_64
=
sampling_ratio
;
OpCommand
cmd
;
cmd
.
Name
(
"RoiAlignRotated"
)
.
Input
(
input
)
.
Input
(
rois
)
.
Output
(
output
)
.
Attr
(
"pooled_h"
,
aligned_height_64
)
.
Attr
(
"pooled_w"
,
aligned_width_64
)
.
Attr
(
"spatial_scale"
,
spatial_scale
)
.
Attr
(
"sampling_ratio"
,
sampling_ratio_64
)
.
Attr
(
"aligned"
,
aligned
)
.
Attr
(
"clockwise"
,
clockwise
)
.
Run
();
}
void
roi_align_rotated_backward_npu
(
Tensor
top_grad
,
Tensor
rois
,
Tensor
bottom_grad
,
int
aligned_height
,
int
aligned_width
,
float
spatial_scale
,
int
sampling_ratio
,
bool
aligned
,
bool
clockwise
)
{
int64_t
aligned_height_64
=
aligned_height
;
int64_t
aligned_width_64
=
aligned_width
;
int64_t
sampling_ratio_64
=
sampling_ratio
;
c10
::
SmallVector
<
int64_t
,
SIZE
>
y_grad_shape
;
auto
shape
=
bottom_grad
.
sizes
();
for
(
uint64_t
i
=
0
;
i
<
shape
.
size
();
i
++
)
{
y_grad_shape
.
emplace_back
(
shape
[
i
]);
}
OpCommand
cmd
;
cmd
.
Name
(
"RoiAlignRotatedGrad"
)
.
Input
(
top_grad
)
.
Input
(
rois
)
.
Output
(
bottom_grad
)
.
Attr
(
"y_grad_shape"
,
y_grad_shape
)
.
Attr
(
"pooled_h"
,
aligned_width_64
)
.
Attr
(
"pooled_w"
,
aligned_height_64
)
.
Attr
(
"spatial_scale"
,
spatial_scale
)
.
Attr
(
"sampling_ratio"
,
sampling_ratio_64
)
.
Attr
(
"aligned"
,
aligned
)
.
Attr
(
"clockwise"
,
clockwise
)
.
Run
();
}
void
roi_align_rotated_forward_impl
(
Tensor
input
,
Tensor
rois
,
Tensor
output
,
int
aligned_height
,
int
aligned_width
,
float
spatial_scale
,
int
sampling_ratio
,
bool
aligned
,
bool
clockwise
);
void
roi_align_rotated_backward_impl
(
Tensor
top_grad
,
Tensor
rois
,
Tensor
bottom_grad
,
int
aligned_height
,
int
aligned_width
,
float
spatial_scale
,
int
sampling_ratio
,
bool
aligned
,
bool
clockwise
);
REGISTER_NPU_IMPL
(
roi_align_rotated_forward_impl
,
roi_align_rotated_forward_npu
);
REGISTER_NPU_IMPL
(
roi_align_rotated_backward_impl
,
roi_align_rotated_backward_npu
);
mmcv/ops/csrc/pytorch/npu/rotated_feature_align_npu.cpp
0 → 100644
View file @
0e2f8a5c
#include "pytorch_npu_helper.hpp"
using
namespace
NPU_NAME_SPACE
;
using
namespace
std
;
void
rotated_feature_align_forward_impl
(
const
Tensor
features
,
const
Tensor
best_bboxes
,
const
float
spatial_scale
,
const
int
points
,
Tensor
output
);
void
rotated_feature_align_backward_impl
(
const
Tensor
top_grad
,
const
Tensor
best_bboxes
,
const
float
spatial_scale
,
const
int
points
,
Tensor
bottom_grad
);
void
rotated_feature_align_forward_npu
(
const
Tensor
features
,
const
Tensor
best_bboxes
,
const
float
spatial_scale
,
const
int
points
,
Tensor
output
)
{
int64_t
points_
=
(
int64_t
)
points
;
at
::
Tensor
best_bboxes_
=
best_bboxes
.
transpose
(
2
,
3
).
transpose
(
1
,
2
);
OpCommand
cmd
;
cmd
.
Name
(
"RotatedFeatureAlign"
)
.
Input
(
features
)
.
Input
(
best_bboxes_
)
.
Output
(
output
)
.
Attr
(
"spatial_scale"
,
spatial_scale
)
.
Attr
(
"points"
,
points_
)
.
Run
();
}
void
rotated_feature_align_backward_npu
(
const
Tensor
top_grad
,
const
Tensor
best_bboxes
,
const
float
spatial_scale
,
const
int
points
,
Tensor
bottom_grad
)
{
int64_t
points_
=
(
int64_t
)
points
;
at
::
Tensor
best_bboxes_
=
best_bboxes
.
transpose
(
2
,
3
).
transpose
(
1
,
2
);
OpCommand
cmd
;
cmd
.
Name
(
"RotatedFeatureAlignGrad"
)
.
Input
(
top_grad
)
.
Input
(
best_bboxes_
)
.
Output
(
bottom_grad
)
.
Attr
(
"spatial_scale"
,
spatial_scale
)
.
Attr
(
"points"
,
points_
)
.
Run
();
}
REGISTER_NPU_IMPL
(
rotated_feature_align_forward_impl
,
rotated_feature_align_forward_npu
);
REGISTER_NPU_IMPL
(
rotated_feature_align_backward_impl
,
rotated_feature_align_backward_npu
);
mmcv/ops/csrc/pytorch/npu/stack_ball_query_npu.cpp
0 → 100644
View file @
0e2f8a5c
#include "pytorch_npu_helper.hpp"
using
namespace
NPU_NAME_SPACE
;
using
namespace
std
;
void
stack_ball_query_forward_npu
(
float
max_radius
,
int
nsample
,
const
Tensor
new_xyz
,
const
Tensor
new_xyz_batch_cnt
,
const
Tensor
xyz
,
const
Tensor
xyz_batch_cnt
,
Tensor
idx
)
{
at
::
Tensor
xyz_transpose
=
xyz
.
transpose
(
0
,
1
).
contiguous
();
double
max_radius_double
=
double
(
max_radius
);
EXEC_NPU_CMD
(
aclnnStackBallQuery
,
xyz_transpose
,
new_xyz
,
xyz_batch_cnt
,
new_xyz_batch_cnt
,
max_radius_double
,
nsample
,
idx
);
}
void
stack_ball_query_forward_impl
(
float
max_radius
,
int
nsample
,
const
Tensor
new_xyz
,
const
Tensor
new_xyz_batch_cnt
,
const
Tensor
xyz
,
const
Tensor
xyz_batch_cnt
,
Tensor
idx
);
REGISTER_NPU_IMPL
(
stack_ball_query_forward_impl
,
stack_ball_query_forward_npu
);
mmcv/ops/csrc/pytorch/npu/stack_group_points_npu.cpp
0 → 100644
View file @
0e2f8a5c
#include "pytorch_npu_helper.hpp"
using
namespace
NPU_NAME_SPACE
;
using
namespace
std
;
void
stack_group_points_forward_npu
(
int
b
,
int
c
,
int
n
,
int
nsample
,
const
Tensor
features_tensor
,
const
Tensor
features_batch_cnt_tensor
,
const
Tensor
idx_tensor
,
const
Tensor
idx_batch_cnt_tensor
,
Tensor
out_tensor
)
{
EXEC_NPU_CMD
(
aclnnStackGroupPoints
,
features_tensor
,
features_batch_cnt_tensor
,
idx_tensor
,
idx_batch_cnt_tensor
,
out_tensor
);
}
void
stack_group_points_forward_impl
(
int
b
,
int
c
,
int
n
,
int
nsample
,
const
Tensor
features_tensor
,
const
Tensor
features_batch_cnt_tensor
,
const
Tensor
idx_tensor
,
const
Tensor
idx_batch_cnt_tensor
,
Tensor
out_tensor
);
REGISTER_NPU_IMPL
(
stack_group_points_forward_impl
,
stack_group_points_forward_npu
);
mmcv/ops/csrc/pytorch/npu/three_interpolate_npu.cpp
0 → 100644
View file @
0e2f8a5c
#include "pytorch_npu_helper.hpp"
#include "torch_npu/csrc/aten/NPUNativeFunctions.h"
#include "torch_npu/csrc/framework/utils/OpAdapter.h"
using
namespace
NPU_NAME_SPACE
;
using
namespace
std
;
void
three_interpolate_forward_npu
(
int
b
,
int
c
,
int
m
,
int
n
,
const
Tensor
points
,
const
Tensor
idx
,
const
Tensor
weight
,
Tensor
out
)
{
auto
originDtype
=
points
.
scalar_type
();
TORCH_CHECK
((
originDtype
==
at
::
kFloat
||
originDtype
==
at
::
kHalf
),
"three_interpolate_forward ascend only support fp32 and fp16."
);
auto
point_c_trans
=
points
.
transpose
(
1
,
2
);
OpCommand
cmd
;
cmd
.
Name
(
"ThreeInterpolate"
)
.
Input
(
point_c_trans
)
.
Input
(
idx
)
.
Input
(
weight
)
.
Output
(
out
)
.
Run
();
auto
output
=
out
.
view
({
b
,
n
,
c
}).
transpose
(
1
,
2
);
auto
res
=
output
.
contiguous
();
out
.
copy_
(
res
);
}
void
three_interpolate_backward_npu
(
int
b
,
int
c
,
int
n
,
int
m
,
const
Tensor
grad_out
,
const
Tensor
idx
,
const
Tensor
weight
,
Tensor
grad_points
)
{
auto
originDtype
=
grad_out
.
scalar_type
();
TORCH_CHECK
((
originDtype
==
at
::
kFloat
||
originDtype
==
at
::
kHalf
),
"three_interpolate_backward ascend only support fp32 and fp16."
);
auto
grad_x
=
at
::
unsqueeze
(
grad_out
,
3
);
auto
grad_y
=
at
::
unsqueeze
(
grad_points
,
3
);
EXEC_NPU_CMD
(
aclnnThreeInterpolateBackward
,
grad_x
,
idx
,
weight
,
m
,
grad_y
);
auto
output
=
at
::
squeeze
(
grad_y
,
3
);
auto
res
=
output
.
contiguous
();
grad_points
.
copy_
(
res
);
}
void
three_interpolate_forward_impl
(
int
b
,
int
c
,
int
m
,
int
n
,
const
Tensor
points
,
const
Tensor
idx
,
const
Tensor
weight
,
Tensor
out
);
void
three_interpolate_backward_impl
(
int
b
,
int
c
,
int
n
,
int
m
,
const
Tensor
grad_out
,
const
Tensor
idx
,
const
Tensor
weight
,
Tensor
grad_points
);
REGISTER_NPU_IMPL
(
three_interpolate_forward_impl
,
three_interpolate_forward_npu
);
REGISTER_NPU_IMPL
(
three_interpolate_backward_impl
,
three_interpolate_backward_npu
);
mmcv/ops/csrc/pytorch/roi_align.cpp
View file @
0e2f8a5c
...
...
@@ -5,10 +5,14 @@
#include <diopi/diopirt.h>
#include <diopi/functions.h>
#include <diopi/functions_mmcv.h>
#include <torch/csrc/utils/pybind.h>
#include "csrc_dipu/base/basedef.h"
#include "csrc_dipu/diopirt/diopirt_impl.h"
#include "csrc_dipu/runtime/device/deviceapis.h"
#include "csrc_dipu/utils/helpfunc.hpp"
using
dipu
::
VENDOR_TYPE
;
using
dipu
::
diopi_helper
::
toDiopiScalar
;
using
dipu
::
diopi_helper
::
toDiopiTensorHandle
;
#endif
...
...
@@ -56,10 +60,18 @@ void roi_align_forward_diopi(Tensor input, Tensor rois, Tensor output,
auto
argmax_x_p
=
toDiopiTensorHandle
(
argmax_x
);
bool
is_mock_cuda
=
input
.
device
().
type
()
==
dipu
::
DIPU_DEVICE_TYPE
;
if
(
is_mock_cuda
&&
reinterpret_cast
<
void
*>
(
diopiRoiAlignMmcv
)
!=
nullptr
)
{
auto
ret
=
diopiRoiAlignMmcv
(
ch
,
out_p
,
argmax_y_p
,
argmax_x_p
,
input_p
,
rois_p
,
aligned_height
,
aligned_width
,
sampling_ratio
,
pool_mode
,
spatial_scale
,
aligned
);
if
(
ret
==
diopiSuccess
)
return
;
if
(
strcmp
(
dipu
::
VendorTypeToStr
(
VENDOR_TYPE
),
"NPU"
)
==
0
)
{
pybind11
::
gil_scoped_release
no_gil
;
auto
ret
=
diopiRoiAlignMmcv
(
ch
,
out_p
,
argmax_y_p
,
argmax_x_p
,
input_p
,
rois_p
,
aligned_height
,
aligned_width
,
sampling_ratio
,
pool_mode
,
spatial_scale
,
aligned
);
if
(
ret
==
diopiSuccess
)
return
;
}
else
{
auto
ret
=
diopiRoiAlignMmcv
(
ch
,
out_p
,
argmax_y_p
,
argmax_x_p
,
input_p
,
rois_p
,
aligned_height
,
aligned_width
,
sampling_ratio
,
pool_mode
,
spatial_scale
,
aligned
);
if
(
ret
==
diopiSuccess
)
return
;
}
}
LOG
(
WARNING
)
<<
"Fallback to cpu: mmcv ext op roi_align_forward"
;
auto
input_cpu
=
input
.
cpu
();
...
...
@@ -96,11 +108,20 @@ void roi_align_backward_diopi(Tensor grad_output, Tensor rois, Tensor argmax_y,
bool
is_mock_cuda
=
grad_output
.
device
().
type
()
==
dipu
::
DIPU_DEVICE_TYPE
;
if
(
is_mock_cuda
&&
reinterpret_cast
<
void
*>
(
diopiRoiAlignBackwardMmcv
)
!=
nullptr
)
{
auto
ret
=
diopiRoiAlignBackwardMmcv
(
ch
,
grad_input_
,
grad_output_
,
rois_
,
argmax_y_
,
argmax_x_
,
aligned_height
,
aligned_width
,
sampling_ratio
,
pool_mode
,
spatial_scale
,
aligned
);
if
(
ret
==
diopiSuccess
)
return
;
if
(
strcmp
(
dipu
::
VendorTypeToStr
(
VENDOR_TYPE
),
"NPU"
)
==
0
)
{
pybind11
::
gil_scoped_release
no_gil
;
auto
ret
=
diopiRoiAlignBackwardMmcv
(
ch
,
grad_input_
,
grad_output_
,
rois_
,
argmax_y_
,
argmax_x_
,
aligned_height
,
aligned_width
,
sampling_ratio
,
pool_mode
,
spatial_scale
,
aligned
);
if
(
ret
==
diopiSuccess
)
return
;
}
else
{
auto
ret
=
diopiRoiAlignBackwardMmcv
(
ch
,
grad_input_
,
grad_output_
,
rois_
,
argmax_y_
,
argmax_x_
,
aligned_height
,
aligned_width
,
sampling_ratio
,
pool_mode
,
spatial_scale
,
aligned
);
if
(
ret
==
diopiSuccess
)
return
;
}
}
LOG
(
WARNING
)
<<
"Fallback to cpu: mmcv ext op roi_align_backward"
;
auto
grad_output_cpu
=
grad_output
.
cpu
();
...
...
mmcv/ops/csrc/pytorch/voxelization.cpp
View file @
0e2f8a5c
...
...
@@ -5,9 +5,13 @@
#include <diopi/diopirt.h>
#include <diopi/functions.h>
#include <diopi/functions_mmcv.h>
#include <torch/csrc/utils/pybind.h>
#include "csrc_dipu/diopirt/diopirt_impl.h"
#include "csrc_dipu/runtime/device/deviceapis.h"
#include "csrc_dipu/utils/helpfunc.hpp"
using
dipu
::
VENDOR_TYPE
;
using
dipu
::
diopi_helper
::
toDiopiScalar
;
using
dipu
::
diopi_helper
::
toDiopiTensorHandle
;
#endif
...
...
@@ -84,11 +88,20 @@ void hard_voxelize_forward_diopi(const at::Tensor &points,
auto
num_points_per_voxel_p
=
toDiopiTensorHandle
(
num_points_per_voxel
);
auto
voxel_num_p
=
toDiopiTensorHandle
(
voxel_num
);
if
(
reinterpret_cast
<
void
*>
(
diopiHardVoxelizeMmcv
)
!=
nullptr
)
{
auto
ret
=
diopiHardVoxelizeMmcv
(
ch
,
voxels_p
,
coors_p
,
num_points_per_voxel_p
,
voxel_num_p
,
points_p
,
voxel_size_p
,
coors_range_p
,
max_points
,
max_voxels
,
NDim
,
deterministic
);
if
(
ret
==
diopiSuccess
)
return
;
if
(
strcmp
(
dipu
::
VendorTypeToStr
(
VENDOR_TYPE
),
"NPU"
)
==
0
)
{
pybind11
::
gil_scoped_release
no_gil
;
auto
ret
=
diopiHardVoxelizeMmcv
(
ch
,
voxels_p
,
coors_p
,
num_points_per_voxel_p
,
voxel_num_p
,
points_p
,
voxel_size_p
,
coors_range_p
,
max_points
,
max_voxels
,
NDim
,
deterministic
);
if
(
ret
==
diopiSuccess
)
return
;
}
else
{
auto
ret
=
diopiHardVoxelizeMmcv
(
ch
,
voxels_p
,
coors_p
,
num_points_per_voxel_p
,
voxel_num_p
,
points_p
,
voxel_size_p
,
coors_range_p
,
max_points
,
max_voxels
,
NDim
,
deterministic
);
if
(
ret
==
diopiSuccess
)
return
;
}
}
LOG
(
WARNING
)
<<
"Fallback to cpu: mmcv ext op hard_voxelize_forward"
;
auto
points_cpu
=
points
.
cpu
();
...
...
@@ -146,9 +159,16 @@ void dynamic_voxelize_forward_diopi(const at::Tensor &points,
auto
coors_range_p
=
toDiopiTensorHandle
(
coors_range
);
auto
coors_p
=
toDiopiTensorHandle
(
coors
);
if
(
reinterpret_cast
<
void
*>
(
diopiDynamicVoxelizeMmcv
)
!=
nullptr
)
{
auto
ret
=
diopiDynamicVoxelizeMmcv
(
ch
,
coors_p
,
points_p
,
voxel_size_p
,
coors_range_p
,
NDim
);
if
(
ret
==
diopiSuccess
)
return
;
if
(
strcmp
(
dipu
::
VendorTypeToStr
(
VENDOR_TYPE
),
"NPU"
)
==
0
)
{
pybind11
::
gil_scoped_release
no_gil
;
auto
ret
=
diopiDynamicVoxelizeMmcv
(
ch
,
coors_p
,
points_p
,
voxel_size_p
,
coors_range_p
,
NDim
);
if
(
ret
==
diopiSuccess
)
return
;
}
else
{
auto
ret
=
diopiDynamicVoxelizeMmcv
(
ch
,
coors_p
,
points_p
,
voxel_size_p
,
coors_range_p
,
NDim
);
if
(
ret
==
diopiSuccess
)
return
;
}
}
LOG
(
WARNING
)
<<
"Fallback to cpu: mmcv ext op dynamic_voxelize_forward"
;
auto
points_cpu
=
points
.
cpu
();
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment