Unverified Commit ad7284e8 authored by Chris Jiang's avatar Chris Jiang Committed by GitHub
Browse files

[Enhancement] Add torch mluops check before calling mluOpsxxx interface (#2871)

parent 86a38aa3
...@@ -34,9 +34,9 @@ void ball_query_forward_mlu(int b, int n, int m, float min_radius, ...@@ -34,9 +34,9 @@ void ball_query_forward_mlu(int b, int n, int m, float min_radius,
auto idx_ptr = idx_impl->cnnlMalloc(); auto idx_ptr = idx_impl->cnnlMalloc();
auto handle = mluOpGetCurrentHandle(); auto handle = mluOpGetCurrentHandle();
mluOpBallQuery(handle, new_xyz_desc.desc(), new_xyz_ptr, xyz_desc.desc(), TORCH_MLUOP_CHECK(mluOpBallQuery(handle, new_xyz_desc.desc(), new_xyz_ptr, xyz_desc.desc(),
xyz_ptr, min_radius, max_radius, nsample, idx_desc.desc(), xyz_ptr, min_radius, max_radius, nsample, idx_desc.desc(),
idx_ptr); idx_ptr));
} }
void ball_query_forward_impl(int b, int n, int m, float min_radius, void ball_query_forward_impl(int b, int n, int m, float min_radius,
......
...@@ -38,9 +38,9 @@ void BoxIouRotatedMLUKernelLauncher(const Tensor boxes1, const Tensor boxes2, ...@@ -38,9 +38,9 @@ void BoxIouRotatedMLUKernelLauncher(const Tensor boxes1, const Tensor boxes2,
auto ious_ptr = ious_impl->cnnlMalloc(); auto ious_ptr = ious_impl->cnnlMalloc();
CNLOG(INFO) << "Call mluOpBoxIouRotated()."; CNLOG(INFO) << "Call mluOpBoxIouRotated().";
mluOpBoxIouRotated(handle, mode_flag, aligned, boxes1_desc.desc(), boxes1_ptr, TORCH_MLUOP_CHECK(mluOpBoxIouRotated(handle, mode_flag, aligned, boxes1_desc.desc(), boxes1_ptr,
boxes2_desc.desc(), boxes2_ptr, ious_desc.desc(), boxes2_desc.desc(), boxes2_ptr, ious_desc.desc(),
ious_ptr); ious_ptr));
} }
void box_iou_rotated_mlu(const Tensor boxes1, const Tensor boxes2, Tensor ious, void box_iou_rotated_mlu(const Tensor boxes1, const Tensor boxes2, Tensor ious,
......
...@@ -71,15 +71,15 @@ void CARAFEForwardMLUKernelLauncher(const Tensor input, const Tensor mask, ...@@ -71,15 +71,15 @@ void CARAFEForwardMLUKernelLauncher(const Tensor input, const Tensor mask,
// set op descriptor // set op descriptor
auto handle = mluOpGetCurrentHandle(); auto handle = mluOpGetCurrentHandle();
mluOpCarafeDescriptor_t carafe_desc; mluOpCarafeDescriptor_t carafe_desc;
mluOpCreateCarafeDescriptor(&carafe_desc); TORCH_MLUOP_CHECK(mluOpCreateCarafeDescriptor(&carafe_desc));
mluOpSetCarafeDescriptor(carafe_desc, input.dim(), kernel_size, group_size, TORCH_MLUOP_CHECK(mluOpSetCarafeDescriptor(carafe_desc, input.dim(), kernel_size, group_size,
scale_factor); scale_factor));
// launch kernel // launch kernel
mluOpCarafeForward(handle, carafe_desc, input_desc.desc(), input_ptr, TORCH_MLUOP_CHECK(mluOpCarafeForward(handle, carafe_desc, input_desc.desc(), input_ptr,
mask_desc.desc(), mask_ptr, output_desc.desc(), mask_desc.desc(), mask_ptr, output_desc.desc(),
output_ptr); output_ptr));
// destroy op descriptor // destroy op descriptor
mluOpDestroyCarafeDescriptor(carafe_desc); TORCH_MLUOP_CHECK(mluOpDestroyCarafeDescriptor(carafe_desc));
// copy output from NHWC back into NCHW // copy output from NHWC back into NCHW
rinput.copy_(rinput_); rinput.copy_(rinput_);
...@@ -159,16 +159,16 @@ void CARAFEBackwardMLUKernelLauncher( ...@@ -159,16 +159,16 @@ void CARAFEBackwardMLUKernelLauncher(
// set op descriptor // set op descriptor
auto handle = mluOpGetCurrentHandle(); auto handle = mluOpGetCurrentHandle();
mluOpCarafeDescriptor_t carafe_desc; mluOpCarafeDescriptor_t carafe_desc;
mluOpCreateCarafeDescriptor(&carafe_desc); TORCH_MLUOP_CHECK(mluOpCreateCarafeDescriptor(&carafe_desc));
mluOpSetCarafeDescriptor(carafe_desc, grad_output.dim(), kernel_size, TORCH_MLUOP_CHECK(mluOpSetCarafeDescriptor(carafe_desc, grad_output.dim(), kernel_size,
group_size, scale_factor); group_size, scale_factor));
// launch kernel // launch kernel
mluOpCarafeBackward(handle, carafe_desc, input_desc.desc(), input_ptr, TORCH_MLUOP_CHECK(mluOpCarafeBackward(handle, carafe_desc, input_desc.desc(), input_ptr,
mask_desc.desc(), mask_ptr, grad_output_desc.desc(), mask_desc.desc(), mask_ptr, grad_output_desc.desc(),
grad_output_ptr, grad_input_desc.desc(), grad_input_ptr, grad_output_ptr, grad_input_desc.desc(), grad_input_ptr,
grad_mask_desc.desc(), grad_mask_ptr); grad_mask_desc.desc(), grad_mask_ptr));
// destroy op descriptor // destroy op descriptor
mluOpDestroyCarafeDescriptor(carafe_desc); TORCH_MLUOP_CHECK(mluOpDestroyCarafeDescriptor(carafe_desc));
// copy output from NHWC back into NCHW // copy output from NHWC back into NCHW
grad_input.copy_(rgrad_input_); grad_input.copy_(rgrad_input_);
......
...@@ -50,10 +50,10 @@ void DeformRoIPoolForwardMLUKernelLauncher(Tensor input, Tensor rois, ...@@ -50,10 +50,10 @@ void DeformRoIPoolForwardMLUKernelLauncher(Tensor input, Tensor rois,
// get compute handle // get compute handle
auto handle = mluOpGetCurrentHandle(); auto handle = mluOpGetCurrentHandle();
mluOpDeformRoiPoolForward( TORCH_MLUOP_CHECK(mluOpDeformRoiPoolForward(
handle, input_desc.desc(), input_ptr, rois_desc.desc(), rois_ptr, handle, input_desc.desc(), input_ptr, rois_desc.desc(), rois_ptr,
offset_real_desc, offset_ptr, pooled_height, pooled_width, spatial_scale, offset_real_desc, offset_ptr, pooled_height, pooled_width, spatial_scale,
sampling_ratio, gamma, output_desc.desc(), output_ptr); sampling_ratio, gamma, output_desc.desc(), output_ptr));
output.copy_(output_contiguous); output.copy_(output_contiguous);
} }
...@@ -113,12 +113,12 @@ void DeformRoIPoolBackwardMLUKernelLauncher( ...@@ -113,12 +113,12 @@ void DeformRoIPoolBackwardMLUKernelLauncher(
// get compute handle // get compute handle
auto handle = mluOpGetCurrentHandle(); auto handle = mluOpGetCurrentHandle();
mluOpDeformRoiPoolBackward( TORCH_MLUOP_CHECK(mluOpDeformRoiPoolBackward(
handle, grad_output_desc.desc(), grad_output_ptr, input_desc.desc(), handle, grad_output_desc.desc(), grad_output_ptr, input_desc.desc(),
input_ptr, rois_desc.desc(), rois_ptr, offset_real_desc, offset_ptr, input_ptr, rois_desc.desc(), rois_ptr, offset_real_desc, offset_ptr,
pooled_height, pooled_width, spatial_scale, sampling_ratio, gamma, pooled_height, pooled_width, spatial_scale, sampling_ratio, gamma,
grad_input_desc.desc(), grad_input_ptr, grad_offset_real_desc, grad_input_desc.desc(), grad_input_ptr, grad_offset_real_desc,
grad_offset_ptr); grad_offset_ptr));
grad_input.copy_(grad_input_); grad_input.copy_(grad_input_);
} }
......
...@@ -42,9 +42,9 @@ Tensor diff_iou_rotated_sort_vertices_forward_mlu(Tensor vertices, Tensor mask, ...@@ -42,9 +42,9 @@ Tensor diff_iou_rotated_sort_vertices_forward_mlu(Tensor vertices, Tensor mask,
auto handle = mluOpGetCurrentHandle(); auto handle = mluOpGetCurrentHandle();
// launch kernel // launch kernel
mluOpDiffIouRotatedSortVerticesForward( TORCH_MLUOP_CHECK(mluOpDiffIouRotatedSortVerticesForward(
handle, vertices_desc.desc(), vertices_ptr, mask_desc.desc(), mask_ptr, handle, vertices_desc.desc(), vertices_ptr, mask_desc.desc(), mask_ptr,
num_valid_desc.desc(), num_valid_ptr, idx_desc.desc(), idx_ptr); num_valid_desc.desc(), num_valid_ptr, idx_desc.desc(), idx_ptr));
return idx; return idx;
} }
......
...@@ -30,7 +30,7 @@ void IoU3DNMS3DMLUKernelLauncher(Tensor boxes, Tensor &keep, Tensor &keep_num, ...@@ -30,7 +30,7 @@ void IoU3DNMS3DMLUKernelLauncher(Tensor boxes, Tensor &keep, Tensor &keep_num,
// workspace // workspace
size_t workspace_size = 0; size_t workspace_size = 0;
auto handle = mluOpGetCurrentHandle(); auto handle = mluOpGetCurrentHandle();
mluOpGetNmsWorkspaceSize(handle, boxes_desc.desc(), NULL, &workspace_size); TORCH_MLUOP_CHECK(mluOpGetNmsWorkspaceSize(handle, boxes_desc.desc(), NULL, &workspace_size));
auto workspace = at::empty(workspace_size, boxes.options().dtype(at::kByte)); auto workspace = at::empty(workspace_size, boxes.options().dtype(at::kByte));
// get compute queue // get compute queue
...@@ -56,16 +56,16 @@ void IoU3DNMS3DMLUKernelLauncher(Tensor boxes, Tensor &keep, Tensor &keep_num, ...@@ -56,16 +56,16 @@ void IoU3DNMS3DMLUKernelLauncher(Tensor boxes, Tensor &keep, Tensor &keep_num,
const int max_output_size = input_box_num; const int max_output_size = input_box_num;
const float offset = 0.0; const float offset = 0.0;
mluOpCreateNmsDescriptor(&nms_desc); TORCH_MLUOP_CHECK(mluOpCreateNmsDescriptor(&nms_desc));
mluOpSetNmsDescriptor(nms_desc, box_mode, output_mode, algo, method_mode, TORCH_MLUOP_CHECK(mluOpSetNmsDescriptor(nms_desc, box_mode, output_mode, algo, method_mode,
iou_threshold, soft_nms_sigma, max_output_size, iou_threshold, soft_nms_sigma, max_output_size,
confidence_threshold, offset, input_layout, confidence_threshold, offset, input_layout,
pad_to_max_output_size); pad_to_max_output_size));
mluOpNms(handle, nms_desc, boxes_desc.desc(), boxes_ptr, NULL, NULL, TORCH_MLUOP_CHECK(mluOpNms(handle, nms_desc, boxes_desc.desc(), boxes_ptr, NULL, NULL,
workspace_ptr, workspace_size, output_desc.desc(), output_ptr, workspace_ptr, workspace_size, output_desc.desc(), output_ptr,
output_size_ptr); output_size_ptr));
mluOpDestroyNmsDescriptor(nms_desc); TORCH_MLUOP_CHECK(mluOpDestroyNmsDescriptor(nms_desc));
} }
void iou3d_nms3d_forward_mlu(const Tensor boxes, Tensor &keep, Tensor &keep_num, void iou3d_nms3d_forward_mlu(const Tensor boxes, Tensor &keep, Tensor &keep_num,
......
...@@ -123,7 +123,7 @@ void MluOpTensorDescriptor::set_desc(const at::Tensor& t, ...@@ -123,7 +123,7 @@ void MluOpTensorDescriptor::set_desc(const at::Tensor& t,
mluOpDataType_t dtype, mluOpDataType_t dtype,
std::vector<int>& dims) { std::vector<int>& dims) {
int dimNb = dims.size(); int dimNb = dims.size();
mluOpSetTensorDescriptor(desc_, layout, dtype, dimNb, dims.data()); TORCH_MLUOP_CHECK(mluOpSetTensorDescriptor(desc_, layout, dtype, dimNb, dims.data()));
} }
// Handles // Handles
......
...@@ -34,6 +34,17 @@ ...@@ -34,6 +34,17 @@
auto NAME##_impl = torch_mlu::getMluTensorImpl(NAME##_contigous); \ auto NAME##_impl = torch_mlu::getMluTensorImpl(NAME##_contigous); \
auto NAME##_ptr = NAME##_impl->cnnlMalloc(); auto NAME##_ptr = NAME##_impl->cnnlMalloc();
#ifndef TORCH_MLUOP_CHECK
#define TORCH_MLUOP_CHECK(EXPR) \
do { \
mluOpStatus_t status = EXPR; \
if (status != MLUOP_STATUS_SUCCESS) { \
CNLOG(ERROR) << ""; \
TORCH_CHECK(false, "MLUOPS error: ", mluOpGetErrorString(status)); \
} \
} while (0);
#endif
enum class reduce_t { SUM = 0, MEAN = 1, MAX = 2 }; enum class reduce_t { SUM = 0, MEAN = 1, MAX = 2 };
inline std::string to_string(reduce_t reduce_type) { inline std::string to_string(reduce_t reduce_type) {
...@@ -54,8 +65,8 @@ mluOpReduceMode_t getMluOpReduceMode(const reduce_t reduce_type); ...@@ -54,8 +65,8 @@ mluOpReduceMode_t getMluOpReduceMode(const reduce_t reduce_type);
class MluOpTensorDescriptor { class MluOpTensorDescriptor {
public: public:
MluOpTensorDescriptor() { mluOpCreateTensorDescriptor(&desc_); }; MluOpTensorDescriptor() { TORCH_MLUOP_CHECK(mluOpCreateTensorDescriptor(&desc_)); };
~MluOpTensorDescriptor() { mluOpDestroyTensorDescriptor(desc_); } ~MluOpTensorDescriptor() { TORCH_MLUOP_CHECK(mluOpDestroyTensorDescriptor(desc_)); }
void set(at::Tensor); void set(at::Tensor);
void set_with_layout(at::Tensor, mluOpTensorLayout_t layout); void set_with_layout(at::Tensor, mluOpTensorLayout_t layout);
...@@ -71,14 +82,14 @@ mluOpHandle_t mluOpGetCurrentHandle(c10::DeviceIndex device_index = -1); ...@@ -71,14 +82,14 @@ mluOpHandle_t mluOpGetCurrentHandle(c10::DeviceIndex device_index = -1);
class MluOpHandle { class MluOpHandle {
public: public:
MluOpHandle() : handle(nullptr) { mluOpCreate(&handle); } MluOpHandle() : handle(nullptr) { TORCH_MLUOP_CHECK(mluOpCreate(&handle)); }
~MluOpHandle() { ~MluOpHandle() {
if (handle) { if (handle) {
mluOpDestroy(handle); TORCH_MLUOP_CHECK(mluOpDestroy(handle));
handle = nullptr; handle = nullptr;
} }
} }
void setQueue(cnrtQueue_t queue) { mluOpSetQueue(handle, queue); } void setQueue(cnrtQueue_t queue) { TORCH_MLUOP_CHECK(mluOpSetQueue(handle, queue)); }
mluOpHandle_t handle; mluOpHandle_t handle;
}; };
......
...@@ -35,12 +35,12 @@ Tensor MsDeformAttnForwardLauncher(const Tensor& value, ...@@ -35,12 +35,12 @@ Tensor MsDeformAttnForwardLauncher(const Tensor& value,
INITIAL_MLU_PARAM_WITH_TENSOR(sampling_loc); INITIAL_MLU_PARAM_WITH_TENSOR(sampling_loc);
INITIAL_MLU_PARAM_WITH_TENSOR(attn_weight); INITIAL_MLU_PARAM_WITH_TENSOR(attn_weight);
mluOpMsDeformAttnForward( TORCH_MLUOP_CHECK(mluOpMsDeformAttnForward(
handle, value_desc.desc(), value_ptr, spatial_shapes_int_desc.desc(), handle, value_desc.desc(), value_ptr, spatial_shapes_int_desc.desc(),
spatial_shapes_int_ptr, level_start_index_int_desc.desc(), spatial_shapes_int_ptr, level_start_index_int_desc.desc(),
level_start_index_int_ptr, sampling_loc_desc.desc(), sampling_loc_ptr, level_start_index_int_ptr, sampling_loc_desc.desc(), sampling_loc_ptr,
attn_weight_desc.desc(), attn_weight_ptr, im2col_step, output_desc.desc(), attn_weight_desc.desc(), attn_weight_ptr, im2col_step, output_desc.desc(),
output_ptr); output_ptr));
output = output.view({batch_size, num_queries, num_heads * channels}); output = output.view({batch_size, num_queries, num_heads * channels});
return output; return output;
......
...@@ -34,8 +34,8 @@ Tensor NMSMLUKernelLauncher(Tensor boxes, Tensor scores, float iou_threshold, ...@@ -34,8 +34,8 @@ Tensor NMSMLUKernelLauncher(Tensor boxes, Tensor scores, float iou_threshold,
// workspace // workspace
size_t workspace_size = 0; size_t workspace_size = 0;
auto handle = mluOpGetCurrentHandle(); auto handle = mluOpGetCurrentHandle();
mluOpGetNmsWorkspaceSize(handle, boxes_desc.desc(), scores_desc.desc(), TORCH_MLUOP_CHECK(mluOpGetNmsWorkspaceSize(handle, boxes_desc.desc(), scores_desc.desc(),
&workspace_size); &workspace_size));
auto workspace = at::empty(workspace_size, boxes.options().dtype(at::kByte)); auto workspace = at::empty(workspace_size, boxes.options().dtype(at::kByte));
// get compute queue // get compute queue
...@@ -62,16 +62,16 @@ Tensor NMSMLUKernelLauncher(Tensor boxes, Tensor scores, float iou_threshold, ...@@ -62,16 +62,16 @@ Tensor NMSMLUKernelLauncher(Tensor boxes, Tensor scores, float iou_threshold,
const bool pad_to_max_output_size = false; const bool pad_to_max_output_size = false;
const int max_output_size = max_output_boxes; const int max_output_size = max_output_boxes;
mluOpCreateNmsDescriptor(&nms_desc); TORCH_MLUOP_CHECK(mluOpCreateNmsDescriptor(&nms_desc));
mluOpSetNmsDescriptor(nms_desc, box_mode, output_mode, algo, method_mode, TORCH_MLUOP_CHECK(mluOpSetNmsDescriptor(nms_desc, box_mode, output_mode, algo, method_mode,
iou_threshold, soft_nms_sigma, max_output_size, iou_threshold, soft_nms_sigma, max_output_size,
confidence_threshold, (float)offset, input_layout, confidence_threshold, (float)offset, input_layout,
pad_to_max_output_size); pad_to_max_output_size));
mluOpNms(handle, nms_desc, boxes_desc.desc(), boxes_ptr, scores_desc.desc(), TORCH_MLUOP_CHECK(mluOpNms(handle, nms_desc, boxes_desc.desc(), boxes_ptr, scores_desc.desc(),
scores_ptr, workspace_ptr, workspace_size, output_desc.desc(), scores_ptr, workspace_ptr, workspace_size, output_desc.desc(),
output_ptr, output_size_ptr); output_ptr, output_size_ptr));
mluOpDestroyNmsDescriptor(nms_desc); TORCH_MLUOP_CHECK(mluOpDestroyNmsDescriptor(nms_desc));
int output_num = *static_cast<int *>(output_size.cpu().data_ptr()); int output_num = *static_cast<int *>(output_size.cpu().data_ptr());
auto ret = output.to(boxes.options().dtype(at::kLong)); auto ret = output.to(boxes.options().dtype(at::kLong));
return ret.slice(0, 0, output_num); return ret.slice(0, 0, output_num);
......
...@@ -30,7 +30,7 @@ Tensor nms_rotated_mlu(Tensor boxes, Tensor scores, float iou_threshold) { ...@@ -30,7 +30,7 @@ Tensor nms_rotated_mlu(Tensor boxes, Tensor scores, float iou_threshold) {
// workspace // workspace
size_t workspace_size = 0; size_t workspace_size = 0;
auto handle = mluOpGetCurrentHandle(); auto handle = mluOpGetCurrentHandle();
mluOpGetNmsRotatedWorkspaceSize(handle, boxes_desc.desc(), &workspace_size); TORCH_MLUOP_CHECK(mluOpGetNmsRotatedWorkspaceSize(handle, boxes_desc.desc(), &workspace_size));
auto workspace = at::empty(workspace_size, boxes.options().dtype(at::kByte)); auto workspace = at::empty(workspace_size, boxes.options().dtype(at::kByte));
auto boxes_impl = torch_mlu::getMluTensorImpl(boxes_); auto boxes_impl = torch_mlu::getMluTensorImpl(boxes_);
...@@ -44,9 +44,9 @@ Tensor nms_rotated_mlu(Tensor boxes, Tensor scores, float iou_threshold) { ...@@ -44,9 +44,9 @@ Tensor nms_rotated_mlu(Tensor boxes, Tensor scores, float iou_threshold) {
auto output_size_impl = torch_mlu::getMluTensorImpl(output_size); auto output_size_impl = torch_mlu::getMluTensorImpl(output_size);
auto output_size_ptr = output_size_impl->cnnlMalloc(); auto output_size_ptr = output_size_impl->cnnlMalloc();
mluOpNmsRotated(handle, iou_threshold, boxes_desc.desc(), boxes_ptr, TORCH_MLUOP_CHECK(mluOpNmsRotated(handle, iou_threshold, boxes_desc.desc(), boxes_ptr,
scores_desc.desc(), scores_ptr, workspace_ptr, workspace_size, scores_desc.desc(), scores_ptr, workspace_ptr, workspace_size,
output_desc.desc(), output_ptr, (int *)output_size_ptr); output_desc.desc(), output_ptr, (int *)output_size_ptr));
int output_num = *static_cast<int *>(output_size.cpu().data_ptr()); int output_num = *static_cast<int *>(output_size.cpu().data_ptr());
auto ret = output.to(boxes.options().dtype(at::kLong)); auto ret = output.to(boxes.options().dtype(at::kLong));
return ret.slice(0, 0, output_num); return ret.slice(0, 0, output_num);
......
...@@ -35,8 +35,8 @@ void PSAMaskForwardMLUKernelLauncher(const int psa_type, const Tensor x, ...@@ -35,8 +35,8 @@ void PSAMaskForwardMLUKernelLauncher(const int psa_type, const Tensor x,
auto y_impl = torch_mlu::getMluTensorImpl(y_tmp); auto y_impl = torch_mlu::getMluTensorImpl(y_tmp);
auto y_ptr = y_impl->cnnlMalloc(); auto y_ptr = y_impl->cnnlMalloc();
mluOpPsamaskForward(handle, psa_type, x_desc.desc(), x_ptr, h_mask, w_mask, TORCH_MLUOP_CHECK(mluOpPsamaskForward(handle, psa_type, x_desc.desc(), x_ptr, h_mask, w_mask,
y_desc.desc(), y_ptr); y_desc.desc(), y_ptr));
y.copy_(y_tmp); y.copy_(y_tmp);
} }
...@@ -67,8 +67,8 @@ void PSAMaskBackwardMLUKernelLauncher(const int psa_type, const Tensor dy, ...@@ -67,8 +67,8 @@ void PSAMaskBackwardMLUKernelLauncher(const int psa_type, const Tensor dy,
auto dy_impl = torch_mlu::getMluTensorImpl(dy_tensor); auto dy_impl = torch_mlu::getMluTensorImpl(dy_tensor);
auto dy_ptr = dy_impl->cnnlMalloc(); auto dy_ptr = dy_impl->cnnlMalloc();
mluOpPsamaskBackward(handle, psa_type, dy_desc.desc(), dy_ptr, h_mask, w_mask, TORCH_MLUOP_CHECK(mluOpPsamaskBackward(handle, psa_type, dy_desc.desc(), dy_ptr, h_mask, w_mask,
dx_tmp_desc.desc(), dx_ptr); dx_tmp_desc.desc(), dx_ptr));
dx.copy_(dx_tmp); dx.copy_(dx_tmp);
} }
......
...@@ -48,10 +48,10 @@ void ROIAlignForwardMLUKernelLauncher(Tensor input, Tensor rois, Tensor output, ...@@ -48,10 +48,10 @@ void ROIAlignForwardMLUKernelLauncher(Tensor input, Tensor rois, Tensor output,
auto output_ptr = output_impl->cnnlMalloc(); auto output_ptr = output_impl->cnnlMalloc();
mluOpRoiAlignForwardDescriptor_t roialign_desc; mluOpRoiAlignForwardDescriptor_t roialign_desc;
mluOpCreateRoiAlignForwardDescriptor(&roialign_desc); TORCH_MLUOP_CHECK(mluOpCreateRoiAlignForwardDescriptor(&roialign_desc));
mluOpSetRoiAlignForwardDescriptor_v2(roialign_desc, aligned_height, TORCH_MLUOP_CHECK(mluOpSetRoiAlignForwardDescriptor_v2(roialign_desc, aligned_height,
aligned_width, sampling_ratio, aligned_width, sampling_ratio,
spatial_scale, pool_mode, aligned); spatial_scale, pool_mode, aligned));
auto handle = mluOpGetCurrentHandle(); auto handle = mluOpGetCurrentHandle();
if (pool_mode == 0) { if (pool_mode == 0) {
...@@ -65,18 +65,18 @@ void ROIAlignForwardMLUKernelLauncher(Tensor input, Tensor rois, Tensor output, ...@@ -65,18 +65,18 @@ void ROIAlignForwardMLUKernelLauncher(Tensor input, Tensor rois, Tensor output,
auto argmax_y_ptr = argmax_y_impl->cnnlMalloc(); auto argmax_y_ptr = argmax_y_impl->cnnlMalloc();
argmax_y_desc.set_with_layout(argmax_x_contiguous, MLUOP_LAYOUT_NHWC); argmax_y_desc.set_with_layout(argmax_x_contiguous, MLUOP_LAYOUT_NHWC);
argmax_x_desc.set_with_layout(argmax_x_contiguous, MLUOP_LAYOUT_NHWC); argmax_x_desc.set_with_layout(argmax_x_contiguous, MLUOP_LAYOUT_NHWC);
mluOpRoiAlignForward_v2(handle, roialign_desc, input_desc.desc(), self_ptr, TORCH_MLUOP_CHECK(mluOpRoiAlignForward_v2(handle, roialign_desc, input_desc.desc(), self_ptr,
rois_desc.desc(), rois_ptr, output_desc.desc(), rois_desc.desc(), rois_ptr, output_desc.desc(),
output_ptr, argmax_x_desc.desc(), argmax_x_ptr, output_ptr, argmax_x_desc.desc(), argmax_x_ptr,
argmax_y_desc.desc(), argmax_y_ptr); argmax_y_desc.desc(), argmax_y_ptr);
argmax_x.copy_(argmax_x_contiguous); argmax_x.copy_(argmax_x_contiguous);
argmax_y.copy_(argmax_y_contiguous); argmax_y.copy_(argmax_y_contiguous);
} else { } else {
mluOpRoiAlignForward_v2(handle, roialign_desc, input_desc.desc(), self_ptr, TORCH_MLUOP_CHECK(mluOpRoiAlignForward_v2(handle, roialign_desc, input_desc.desc(), self_ptr,
rois_desc.desc(), rois_ptr, output_desc.desc(), rois_desc.desc(), rois_ptr, output_desc.desc(),
output_ptr, NULL, NULL, NULL, NULL); output_ptr, NULL, NULL, NULL, NULL);
} }
mluOpDestroyRoiAlignForwardDescriptor(roialign_desc); TORCH_MLUOP_CHECK(mluOpDestroyRoiAlignForwardDescriptor(roialign_desc));
output.copy_(output_contiguous); output.copy_(output_contiguous);
} }
...@@ -136,16 +136,16 @@ void ROIAlignBackwardMLUKernelLauncher(Tensor grad, Tensor rois, ...@@ -136,16 +136,16 @@ void ROIAlignBackwardMLUKernelLauncher(Tensor grad, Tensor rois,
auto argmax_y_ptr = argmax_y_impl->cnnlMalloc(); auto argmax_y_ptr = argmax_y_impl->cnnlMalloc();
argmax_y_desc.set_with_layout(argmax_x_contiguous, MLUOP_LAYOUT_NHWC); argmax_y_desc.set_with_layout(argmax_x_contiguous, MLUOP_LAYOUT_NHWC);
argmax_x_desc.set_with_layout(argmax_x_contiguous, MLUOP_LAYOUT_NHWC); argmax_x_desc.set_with_layout(argmax_x_contiguous, MLUOP_LAYOUT_NHWC);
mluOpRoiAlignBackward_v2(handle, grads_desc.desc(), grad_ptr, TORCH_MLUOP_CHECK(mluOpRoiAlignBackward_v2(handle, grads_desc.desc(), grad_ptr,
rois_desc.desc(), rois_ptr, argmax_y_desc.desc(), rois_desc.desc(), rois_ptr, argmax_y_desc.desc(),
argmax_x_ptr, argmax_y_desc.desc(), argmax_y_ptr, argmax_x_ptr, argmax_y_desc.desc(), argmax_y_ptr,
spatial_scale, sampling_ratio, aligned, pool_mode, spatial_scale, sampling_ratio, aligned, pool_mode,
grad_input_desc.desc(), grad_input_ptr); grad_input_desc.desc(), grad_input_ptr));
} else { } else {
mluOpRoiAlignBackward_v2(handle, grads_desc.desc(), grad_ptr, TORCH_MLUOP_CHECK(mluOpRoiAlignBackward_v2(handle, grads_desc.desc(), grad_ptr,
rois_desc.desc(), rois_ptr, NULL, NULL, NULL, NULL, rois_desc.desc(), rois_ptr, NULL, NULL, NULL, NULL,
spatial_scale, sampling_ratio, aligned, pool_mode, spatial_scale, sampling_ratio, aligned, pool_mode,
grad_input_desc.desc(), grad_input_ptr); grad_input_desc.desc(), grad_input_ptr));
} }
grad_input.copy_(grad_input_); grad_input.copy_(grad_input_);
} }
......
...@@ -40,10 +40,10 @@ void ROIAlignRotatedForwardMLUKernelLauncher(Tensor input, Tensor rois, ...@@ -40,10 +40,10 @@ void ROIAlignRotatedForwardMLUKernelLauncher(Tensor input, Tensor rois,
// get compute handle // get compute handle
auto handle = mluOpGetCurrentHandle(); auto handle = mluOpGetCurrentHandle();
mluOpRoiAlignRotatedForward( TORCH_MLUOP_CHECK(mluOpRoiAlignRotatedForward(
handle, input_desc.desc(), input_ptr, rois_desc.desc(), rois_ptr, handle, input_desc.desc(), input_ptr, rois_desc.desc(), rois_ptr,
pooled_height, pooled_width, sampling_ratio, spatial_scale, aligned, pooled_height, pooled_width, sampling_ratio, spatial_scale, aligned,
clockwise, output_desc.desc(), output_ptr); clockwise, output_desc.desc(), output_ptr);
output.copy_(output_contiguous); output.copy_(output_contiguous);
} }
...@@ -76,10 +76,10 @@ void ROIAlignRotatedBackwardMLUKernelLauncher( ...@@ -76,10 +76,10 @@ void ROIAlignRotatedBackwardMLUKernelLauncher(
// get compute handle // get compute handle
auto handle = mluOpGetCurrentHandle(); auto handle = mluOpGetCurrentHandle();
mluOpRoiAlignRotatedBackward( TORCH_MLUOP_CHECK(mluOpRoiAlignRotatedBackward(
handle, top_grad_desc.desc(), top_grad_ptr, rois_desc.desc(), rois_ptr, handle, top_grad_desc.desc(), top_grad_ptr, rois_desc.desc(), rois_ptr,
pooled_height, pooled_width, sampling_ratio, spatial_scale, aligned, pooled_height, pooled_width, sampling_ratio, spatial_scale, aligned,
clockwise, bottom_grad_desc.desc(), bottom_grad_ptr); clockwise, bottom_grad_desc.desc(), bottom_grad_ptr);
bottom_grad.copy_(bottom_grad_); bottom_grad.copy_(bottom_grad_);
} }
......
...@@ -44,9 +44,9 @@ void RoiawarePool3dForwardMLUKernelLauncher( ...@@ -44,9 +44,9 @@ void RoiawarePool3dForwardMLUKernelLauncher(
// allocate extra space for workspace // allocate extra space for workspace
size_t workspace_size = 0; size_t workspace_size = 0;
mluOpGetRoiawarePool3dForwardWorkspaceSize( TORCH_MLUOP_CHECK(mluOpGetRoiawarePool3dForwardWorkspaceSize(
handle, rois_desc.desc(), pts_desc.desc(), pts_feature_desc.desc(), handle, rois_desc.desc(), pts_desc.desc(), pts_feature_desc.desc(),
&workspace_size); &workspace_size));
auto workspace = at::empty(workspace_size, rois.options().dtype(at::kByte)); auto workspace = at::empty(workspace_size, rois.options().dtype(at::kByte));
auto workspace_impl = torch_mlu::getMluTensorImpl(workspace); auto workspace_impl = torch_mlu::getMluTensorImpl(workspace);
...@@ -69,13 +69,13 @@ void RoiawarePool3dForwardMLUKernelLauncher( ...@@ -69,13 +69,13 @@ void RoiawarePool3dForwardMLUKernelLauncher(
auto pooled_features_ptr = pooled_features_impl->cnnlMalloc(); auto pooled_features_ptr = pooled_features_impl->cnnlMalloc();
CNLOG(INFO) << "Call mluOpRoiawarePool3dForward()."; CNLOG(INFO) << "Call mluOpRoiawarePool3dForward().";
mluOpRoiawarePool3dForward( TORCH_MLUOP_CHECK(mluOpRoiawarePool3dForward(
handle, pool_method, boxes_num, pts_num, channels, rois_desc.desc(), handle, pool_method, boxes_num, pts_num, channels, rois_desc.desc(),
rois_ptr, pts_desc.desc(), pts_ptr, pts_feature_desc.desc(), rois_ptr, pts_desc.desc(), pts_ptr, pts_feature_desc.desc(),
pts_feature_ptr, workspace_ptr, workspace_size, max_pts_each_voxel, out_x, pts_feature_ptr, workspace_ptr, workspace_size, max_pts_each_voxel, out_x,
out_y, out_z, argmax_desc.desc(), argmax_ptr, out_y, out_z, argmax_desc.desc(), argmax_ptr,
pts_idx_of_voxels_desc.desc(), pts_idx_of_voxels_ptr, pts_idx_of_voxels_desc.desc(), pts_idx_of_voxels_ptr,
pooled_features_desc.desc(), pooled_features_ptr); pooled_features_desc.desc(), pooled_features_ptr));
} }
void roiaware_pool3d_forward_mlu(int boxes_num, int pts_num, int channels, void roiaware_pool3d_forward_mlu(int boxes_num, int pts_num, int channels,
...@@ -135,11 +135,11 @@ void RoiawarePool3dBackwardMLUKernelLauncher( ...@@ -135,11 +135,11 @@ void RoiawarePool3dBackwardMLUKernelLauncher(
auto grad_in_ptr = grad_in_impl->cnnlMalloc(); auto grad_in_ptr = grad_in_impl->cnnlMalloc();
CNLOG(INFO) << "Call mluOpRoiawarePool3dBackward()."; CNLOG(INFO) << "Call mluOpRoiawarePool3dBackward().";
mluOpRoiawarePool3dBackward( TORCH_MLUOP_CHECK(mluOpRoiawarePool3dBackward(
handle, pool_method, boxes_num, out_x, out_y, out_z, channels, handle, pool_method, boxes_num, out_x, out_y, out_z, channels,
max_pts_each_voxel, pts_idx_of_voxels_desc.desc(), pts_idx_of_voxels_ptr, max_pts_each_voxel, pts_idx_of_voxels_desc.desc(), pts_idx_of_voxels_ptr,
argmax_desc.desc(), argmax_ptr, grad_out_desc.desc(), grad_out_ptr, argmax_desc.desc(), argmax_ptr, grad_out_desc.desc(), grad_out_ptr,
grad_in_desc.desc(), grad_in_ptr); grad_in_desc.desc(), grad_in_ptr));
} }
void roiaware_pool3d_backward_mlu(int boxes_num, int out_x, int out_y, void roiaware_pool3d_backward_mlu(int boxes_num, int out_x, int out_y,
......
...@@ -40,9 +40,9 @@ void RotatedFeatureAlignForwardMLUKernelLauncher(const Tensor features, ...@@ -40,9 +40,9 @@ void RotatedFeatureAlignForwardMLUKernelLauncher(const Tensor features,
// get compute handle // get compute handle
auto handle = mluOpGetCurrentHandle(); auto handle = mluOpGetCurrentHandle();
mluOpRotatedFeatureAlignForward( TORCH_MLUOP_CHECK(mluOpRotatedFeatureAlignForward(
handle, features_desc.desc(), features_ptr, best_bboxes_desc.desc(), handle, features_desc.desc(), features_ptr, best_bboxes_desc.desc(),
best_bboxes_ptr, spatial_scale, points, output_desc.desc(), output_ptr); best_bboxes_ptr, spatial_scale, points, output_desc.desc(), output_ptr));
output.copy_(output_contiguous); output.copy_(output_contiguous);
} }
...@@ -76,10 +76,10 @@ void RotatedFeatureAlignBackwardMLUKernelLauncher(const Tensor top_grad, ...@@ -76,10 +76,10 @@ void RotatedFeatureAlignBackwardMLUKernelLauncher(const Tensor top_grad,
// get compute handle // get compute handle
auto handle = mluOpGetCurrentHandle(); auto handle = mluOpGetCurrentHandle();
mluOpRotatedFeatureAlignBackward(handle, top_grad_desc.desc(), top_grad_ptr, TORCH_MLUOP_CHECK(mluOpRotatedFeatureAlignBackward(handle, top_grad_desc.desc(), top_grad_ptr,
best_bboxes_desc.desc(), best_bboxes_ptr, best_bboxes_desc.desc(), best_bboxes_ptr,
spatial_scale, points, spatial_scale, points,
bottom_grad_desc.desc(), bottom_grad_ptr); bottom_grad_desc.desc(), bottom_grad_ptr));
bottom_grad.copy_(bottom_grad_); bottom_grad.copy_(bottom_grad_);
} }
......
...@@ -49,20 +49,20 @@ std::vector<Tensor> dynamic_point_to_voxel_forward_mlu( ...@@ -49,20 +49,20 @@ std::vector<Tensor> dynamic_point_to_voxel_forward_mlu(
auto handle = mluOpGetCurrentHandle(); auto handle = mluOpGetCurrentHandle();
size_t workspace_size; size_t workspace_size;
mluOpGetDynamicPointToVoxelForwardWorkspaceSize( TORCH_MLUOP_CHECK(mluOpGetDynamicPointToVoxelForwardWorkspaceSize(
handle, feats_desc.desc(), coors_desc.desc(), &workspace_size); handle, feats_desc.desc(), coors_desc.desc(), &workspace_size));
auto workspace_tensor = auto workspace_tensor =
at::empty(workspace_size, feats.options().dtype(at::kByte)); at::empty(workspace_size, feats.options().dtype(at::kByte));
INITIAL_MLU_PARAM_WITH_TENSOR(workspace_tensor); INITIAL_MLU_PARAM_WITH_TENSOR(workspace_tensor);
// launch kernel // launch kernel
mluOpDynamicPointToVoxelForward( TORCH_MLUOP_CHECK(mluOpDynamicPointToVoxelForward(
handle, mlu_reduce_type, feats_desc.desc(), feats_ptr, coors_desc.desc(), handle, mlu_reduce_type, feats_desc.desc(), feats_ptr, coors_desc.desc(),
coors_ptr, workspace_tensor_ptr, workspace_size, coors_ptr, workspace_tensor_ptr, workspace_size,
reduced_feats_desc.desc(), reduced_feats_ptr, out_coors_desc.desc(), reduced_feats_desc.desc(), reduced_feats_ptr, out_coors_desc.desc(),
out_coors_ptr, coors_map_desc.desc(), coors_map_ptr, out_coors_ptr, coors_map_desc.desc(), coors_map_ptr,
reduce_count_desc.desc(), reduce_count_ptr, voxel_num_desc.desc(), reduce_count_desc.desc(), reduce_count_ptr, voxel_num_desc.desc(),
voxel_num_ptr); voxel_num_ptr));
int voxel_num_value = *static_cast<int *>(voxel_num.cpu().data_ptr()); int voxel_num_value = *static_cast<int *>(voxel_num.cpu().data_ptr());
TORCH_CHECK(voxel_num_value <= feats.size(0), TORCH_CHECK(voxel_num_value <= feats.size(0),
...@@ -124,22 +124,22 @@ void dynamic_point_to_voxel_backward_mlu( ...@@ -124,22 +124,22 @@ void dynamic_point_to_voxel_backward_mlu(
auto handle = mluOpGetCurrentHandle(); auto handle = mluOpGetCurrentHandle();
size_t workspace_size; size_t workspace_size;
mluOpGetDynamicPointToVoxelBackwardWorkspaceSize( TORCH_MLUOP_CHECK(mluOpGetDynamicPointToVoxelBackwardWorkspaceSize(
handle, mlu_reduce_type, grad_feats_desc.desc(), feats_desc.desc(), handle, mlu_reduce_type, grad_feats_desc.desc(), feats_desc.desc(),
grad_reduced_feats_desc.desc(), coors_idx_desc.desc(), grad_reduced_feats_desc.desc(), coors_idx_desc.desc(),
reduce_count_desc.desc(), voxel_num_desc.desc(), &workspace_size); reduce_count_desc.desc(), voxel_num_desc.desc(), &workspace_size));
auto workspace_tensor = auto workspace_tensor =
at::empty(workspace_size, feats.options().dtype(at::kByte)); at::empty(workspace_size, feats.options().dtype(at::kByte));
INITIAL_MLU_PARAM_WITH_TENSOR(workspace_tensor); INITIAL_MLU_PARAM_WITH_TENSOR(workspace_tensor);
// launch kernel // launch kernel
mluOpDynamicPointToVoxelBackward( TORCH_MLUOP_CHECK(mluOpDynamicPointToVoxelBackward(
handle, mlu_reduce_type, grad_reduced_feats_desc.desc(), handle, mlu_reduce_type, grad_reduced_feats_desc.desc(),
grad_reduced_feats_ptr, feats_desc.desc(), feats_ptr, grad_reduced_feats_ptr, feats_desc.desc(), feats_ptr,
reduced_feats_desc.desc(), reduced_feats_ptr, coors_idx_desc.desc(), reduced_feats_desc.desc(), reduced_feats_ptr, coors_idx_desc.desc(),
coors_idx_ptr, reduce_count_desc.desc(), reduce_count_ptr, coors_idx_ptr, reduce_count_desc.desc(), reduce_count_ptr,
voxel_num_desc.desc(), voxel_num_ptr, workspace_tensor_ptr, voxel_num_desc.desc(), voxel_num_ptr, workspace_tensor_ptr,
workspace_size, grad_feats_desc.desc(), grad_feats_ptr); workspace_size, grad_feats_desc.desc(), grad_feats_ptr));
} }
std::vector<Tensor> dynamic_point_to_voxel_forward_impl( std::vector<Tensor> dynamic_point_to_voxel_forward_impl(
......
...@@ -86,31 +86,31 @@ std::vector<torch::Tensor> GetIndicePairsForwardMLUKernelLauncher( ...@@ -86,31 +86,31 @@ std::vector<torch::Tensor> GetIndicePairsForwardMLUKernelLauncher(
mluOpDataType_t dtype = MLUOP_DTYPE_INT32; mluOpDataType_t dtype = MLUOP_DTYPE_INT32;
std::vector<int> dims; std::vector<int> dims;
dims = {numAct, coorDim + 1}; dims = {numAct, coorDim + 1};
mluOpSetTensorDescriptor(indices_desc.desc(), layout, dtype, dims.size(), TORCH_MLUOP_CHECK(mluOpSetTensorDescriptor(indices_desc.desc(), layout, dtype, dims.size(),
dims.data()); dims.data()));
dims = {kernelVolume, 2, numAct}; dims = {kernelVolume, 2, numAct};
mluOpSetTensorDescriptor(indicePairs_desc.desc(), layout, dtype, TORCH_MLUOP_CHECK(mluOpSetTensorDescriptor(indicePairs_desc.desc(), layout, dtype,
dims.size(), dims.data()); dims.size(), dims.data()));
dims = {kernelVolume}; dims = {kernelVolume};
mluOpSetTensorDescriptor(indiceNum_desc.desc(), layout, dtype, dims.size(), TORCH_MLUOP_CHECK(mluOpSetTensorDescriptor(indiceNum_desc.desc(), layout, dtype, dims.size(),
dims.data()); dims.data()));
dims = {out_size, coorDim + 1}; dims = {out_size, coorDim + 1};
mluOpSetTensorDescriptor(out_indices_desc.desc(), layout, dtype, TORCH_MLUOP_CHECK(mluOpSetTensorDescriptor(out_indices_desc.desc(), layout, dtype,
dims.size(), dims.data()); dims.size(), dims.data()));
} }
mluOpSparseConvolutionDescriptor_t sparse_conv_desc; mluOpSparseConvolutionDescriptor_t sparse_conv_desc;
mluOpCreateSparseConvolutionDescriptor(&sparse_conv_desc); TORCH_MLUOP_CHECK(mluOpCreateSparseConvolutionDescriptor(&sparse_conv_desc));
mluOpSetSparseConvolutionDescriptor( TORCH_MLUOP_CHECK(mluOpSetSparseConvolutionDescriptor(
sparse_conv_desc, NDim + 2, batch, padding32.data(), stride32.data(), sparse_conv_desc, NDim + 2, batch, padding32.data(), stride32.data(),
dilation32.data(), input_space.data(), filter_space.data(), dilation32.data(), input_space.data(), filter_space.data(),
output_space.data(), sub_m, transpose, 0); output_space.data(), sub_m, transpose, 0));
auto handle = mluOpGetCurrentHandle(); auto handle = mluOpGetCurrentHandle();
size_t workspace_size = 0; size_t workspace_size = 0;
mluOpGetIndicePairsWorkspaceSize( TORCH_MLUOP_CHECK(mluOpGetIndicePairsWorkspaceSize(
handle, sparse_conv_desc, indices_desc.desc(), indicePairs_desc.desc(), handle, sparse_conv_desc, indices_desc.desc(), indicePairs_desc.desc(),
out_indices_desc.desc(), indiceNum_desc.desc(), &workspace_size); out_indices_desc.desc(), indiceNum_desc.desc(), &workspace_size));
auto indice_workspace_size = auto indice_workspace_size =
at::empty(workspace_size, indices.options().dtype(at::kByte)); at::empty(workspace_size, indices.options().dtype(at::kByte));
...@@ -127,14 +127,14 @@ std::vector<torch::Tensor> GetIndicePairsForwardMLUKernelLauncher( ...@@ -127,14 +127,14 @@ std::vector<torch::Tensor> GetIndicePairsForwardMLUKernelLauncher(
auto indiceNum_ptr = indiceNum_impl->cnnlMalloc(); auto indiceNum_ptr = indiceNum_impl->cnnlMalloc();
auto indice_workspace_ptr = indice_workspace_impl->cnnlMalloc(); auto indice_workspace_ptr = indice_workspace_impl->cnnlMalloc();
mluOpGetIndicePairs(handle, sparse_conv_desc, indices_desc.desc(), TORCH_MLUOP_CHECK(mluOpGetIndicePairs(handle, sparse_conv_desc, indices_desc.desc(),
indices_ptr, indice_workspace_ptr, workspace_size, indices_ptr, indice_workspace_ptr, workspace_size,
indicePairs_desc.desc(), indicePairs_ptr, indicePairs_desc.desc(), indicePairs_ptr,
out_indices_desc.desc(), out_indices_ptr, out_indices_desc.desc(), out_indices_ptr,
indiceNum_desc.desc(), indiceNum_ptr); indiceNum_desc.desc(), indiceNum_ptr));
int num_act_out = 0; int num_act_out = 0;
mluOpGetSparseConvolutionNumActOut(sparse_conv_desc, &num_act_out); TORCH_MLUOP_CHECK(mluOpGetSparseConvolutionNumActOut(sparse_conv_desc, &num_act_out));
mluOpDestroySparseConvolutionDescriptor(sparse_conv_desc); TORCH_MLUOP_CHECK(mluOpDestroySparseConvolutionDescriptor(sparse_conv_desc));
if (!sub_m) { if (!sub_m) {
return {out_indices.slice(0, 0, num_act_out), indicePairs, indiceNum}; return {out_indices.slice(0, 0, num_act_out), indicePairs, indiceNum};
} else { } else {
...@@ -179,33 +179,33 @@ torch::Tensor IndiceConvForwardMLUKernelLauncher( ...@@ -179,33 +179,33 @@ torch::Tensor IndiceConvForwardMLUKernelLauncher(
int dims[8]; int dims[8];
// features_desc // features_desc
mluOpGetTensorDescriptor(features_desc.desc(), &layout, &dtype, &dim, dims); TORCH_MLUOP_CHECK(mluOpGetTensorDescriptor(features_desc.desc(), &layout, &dtype, &dim, dims));
mluOpSetTensorDescriptor(features_desc.desc(), MLUOP_LAYOUT_ARRAY, dtype, TORCH_MLUOP_CHECK(mluOpSetTensorDescriptor(features_desc.desc(), MLUOP_LAYOUT_ARRAY, dtype,
dim, dims); dim, dims));
// filters_desc // filters_desc
mluOpGetTensorDescriptor(filters_desc.desc(), &layout, &dtype, &dim, dims); TORCH_MLUOP_CHECK(mluOpGetTensorDescriptor(filters_desc.desc(), &layout, &dtype, &dim, dims));
mluOpSetTensorDescriptor(filters_desc.desc(), MLUOP_LAYOUT_ARRAY, dtype, TORCH_MLUOP_CHECK(mluOpSetTensorDescriptor(filters_desc.desc(), MLUOP_LAYOUT_ARRAY, dtype,
dim, dims); dim, dims));
// indice_pairs_desc // indice_pairs_desc
mluOpGetTensorDescriptor(indice_pairs_desc.desc(), &layout, &dtype, &dim, TORCH_MLUOP_CHECK(mluOpGetTensorDescriptor(indice_pairs_desc.desc(), &layout, &dtype, &dim,
dims); dims));
mluOpSetTensorDescriptor(indice_pairs_desc.desc(), MLUOP_LAYOUT_ARRAY, TORCH_MLUOP_CHECK(mluOpSetTensorDescriptor(indice_pairs_desc.desc(), MLUOP_LAYOUT_ARRAY,
dtype, dim, dims); dtype, dim, dims));
// output_desc // output_desc
mluOpGetTensorDescriptor(output_desc.desc(), &layout, &dtype, &dim, dims); TORCH_MLUOP_CHECK(mluOpGetTensorDescriptor(output_desc.desc(), &layout, &dtype, &dim, dims));
mluOpSetTensorDescriptor(output_desc.desc(), MLUOP_LAYOUT_ARRAY, dtype, dim, TORCH_MLUOP_CHECK(mluOpSetTensorDescriptor(output_desc.desc(), MLUOP_LAYOUT_ARRAY, dtype, dim,
dims); dims));
} }
auto handle = mluOpGetCurrentHandle(); auto handle = mluOpGetCurrentHandle();
size_t workspace_size = 0; size_t workspace_size = 0;
mluOpGetIndiceConvolutionForwardWorkspaceSize( TORCH_MLUOP_CHECK(mluOpGetIndiceConvolutionForwardWorkspaceSize(
handle, features_desc.desc(), filters_desc.desc(), handle, features_desc.desc(), filters_desc.desc(),
indice_pairs_desc.desc(), output_desc.desc(), indice_num, numActOut, indice_pairs_desc.desc(), output_desc.desc(), indice_num, numActOut,
_inverse, _subM, &workspace_size); _inverse, _subM, &workspace_size));
auto workspace = auto workspace =
at::empty(workspace_size, features.options().dtype(at::kByte)); at::empty(workspace_size, features.options().dtype(at::kByte));
...@@ -223,11 +223,11 @@ torch::Tensor IndiceConvForwardMLUKernelLauncher( ...@@ -223,11 +223,11 @@ torch::Tensor IndiceConvForwardMLUKernelLauncher(
// outputs // outputs
auto output_impl = torch_mlu::getMluTensorImpl(output); auto output_impl = torch_mlu::getMluTensorImpl(output);
auto output_ptr = output_impl->cnnlMalloc(); auto output_ptr = output_impl->cnnlMalloc();
mluOpIndiceConvolutionForward( TORCH_MLUOP_CHECK(mluOpIndiceConvolutionForward(
handle, features_desc.desc(), features_ptr, filters_desc.desc(), handle, features_desc.desc(), features_ptr, filters_desc.desc(),
filters_ptr, indice_pairs_desc.desc(), indice_pairs_ptr, indice_num, filters_ptr, indice_pairs_desc.desc(), indice_pairs_ptr, indice_num,
numActOut, _inverse, _subM, workspace_ptr, workspace_size, numActOut, _inverse, _subM, workspace_ptr, workspace_size,
output_desc.desc(), output_ptr); output_desc.desc(), output_ptr));
return output; return output;
} }
...@@ -290,37 +290,37 @@ std::vector<torch::Tensor> IndiceConvBackwardMLUKernelLauncher( ...@@ -290,37 +290,37 @@ std::vector<torch::Tensor> IndiceConvBackwardMLUKernelLauncher(
int dims[8]; int dims[8];
// features_desc // features_desc
mluOpGetTensorDescriptor(features_desc.desc(), &layout, &dtype, &dim, dims); TORCH_MLUOP_CHECK(mluOpGetTensorDescriptor(features_desc.desc(), &layout, &dtype, &dim, dims));
mluOpSetTensorDescriptor(features_desc.desc(), MLUOP_LAYOUT_ARRAY, dtype, TORCH_MLUOP_CHECK(mluOpSetTensorDescriptor(features_desc.desc(), MLUOP_LAYOUT_ARRAY, dtype,
dim, dims); dim, dims));
// filters_desc // filters_desc
mluOpGetTensorDescriptor(filters_desc.desc(), &layout, &dtype, &dim, dims); TORCH_MLUOP_CHECK(mluOpGetTensorDescriptor(filters_desc.desc(), &layout, &dtype, &dim, dims));
if (dim == 4) { if (dim == 4) {
mluOpSetTensorDescriptor(filters_desc.desc(), MLUOP_LAYOUT_HWCN, dtype, TORCH_MLUOP_CHECK(mluOpSetTensorDescriptor(filters_desc.desc(), MLUOP_LAYOUT_HWCN, dtype,
dim, dims); dim, dims));
} else { } else {
mluOpSetTensorDescriptor(filters_desc.desc(), MLUOP_LAYOUT_ARRAY, dtype, TORCH_MLUOP_CHECK(mluOpSetTensorDescriptor(filters_desc.desc(), MLUOP_LAYOUT_ARRAY, dtype,
dim, dims); dim, dims));
} }
// output_grad_desc // output_grad_desc
mluOpGetTensorDescriptor(output_grad_desc.desc(), &layout, &dtype, &dim, TORCH_MLUOP_CHECK(mluOpGetTensorDescriptor(output_grad_desc.desc(), &layout, &dtype, &dim,
dims); dims));
mluOpSetTensorDescriptor(output_grad_desc.desc(), MLUOP_LAYOUT_ARRAY, dtype, TORCH_MLUOP_CHECK(mluOpSetTensorDescriptor(output_grad_desc.desc(), MLUOP_LAYOUT_ARRAY, dtype,
dim, dims); dim, dims));
// indice_pairs_desc // indice_pairs_desc
mluOpGetTensorDescriptor(indice_pairs_desc.desc(), &layout, &dtype, &dim, TORCH_MLUOP_CHECK(mluOpGetTensorDescriptor(indice_pairs_desc.desc(), &layout, &dtype, &dim,
dims); dims));
mluOpSetTensorDescriptor(indice_pairs_desc.desc(), MLUOP_LAYOUT_ARRAY, TORCH_MLUOP_CHECK(mluOpSetTensorDescriptor(indice_pairs_desc.desc(), MLUOP_LAYOUT_ARRAY,
dtype, dim, dims); dtype, dim, dims));
// input_grad_desc // input_grad_desc
mluOpGetTensorDescriptor(input_grad_desc.desc(), &layout, &dtype, &dim, TORCH_MLUOP_CHECK(mluOpGetTensorDescriptor(input_grad_desc.desc(), &layout, &dtype, &dim,
dims); dims));
mluOpSetTensorDescriptor(input_grad_desc.desc(), MLUOP_LAYOUT_ARRAY, dtype, TORCH_MLUOP_CHECK(mluOpSetTensorDescriptor(input_grad_desc.desc(), MLUOP_LAYOUT_ARRAY, dtype,
dim, dims); dim, dims));
} }
auto handle = mluOpGetCurrentHandle(); auto handle = mluOpGetCurrentHandle();
...@@ -331,10 +331,10 @@ std::vector<torch::Tensor> IndiceConvBackwardMLUKernelLauncher( ...@@ -331,10 +331,10 @@ std::vector<torch::Tensor> IndiceConvBackwardMLUKernelLauncher(
&data_workspace_size); &data_workspace_size);
size_t filters_workspace_size = 0; size_t filters_workspace_size = 0;
mluOpGetIndiceConvolutionBackwardFilterWorkspaceSize( TORCH_MLUOP_CHECK(mluOpGetIndiceConvolutionBackwardFilterWorkspaceSize(
handle, features_desc.desc(), output_grad_desc.desc(), handle, features_desc.desc(), output_grad_desc.desc(),
indice_pairs_desc.desc(), filters_grad_desc.desc(), indice_num, _inverse, indice_pairs_desc.desc(), filters_grad_desc.desc(), indice_num, _inverse,
_subM, &filters_workspace_size); _subM, &filters_workspace_size));
auto indice_convbpdata_workspace = auto indice_convbpdata_workspace =
at::empty(data_workspace_size, features.options().dtype(at::kByte)); at::empty(data_workspace_size, features.options().dtype(at::kByte));
...@@ -365,17 +365,17 @@ std::vector<torch::Tensor> IndiceConvBackwardMLUKernelLauncher( ...@@ -365,17 +365,17 @@ std::vector<torch::Tensor> IndiceConvBackwardMLUKernelLauncher(
auto filters_grad_impl = torch_mlu::getMluTensorImpl(filters_grad); auto filters_grad_impl = torch_mlu::getMluTensorImpl(filters_grad);
auto filters_grad_ptr = filters_grad_impl->cnnlMalloc(); auto filters_grad_ptr = filters_grad_impl->cnnlMalloc();
mluOpIndiceConvolutionBackwardData( TORCH_MLUOP_CHECK(mluOpIndiceConvolutionBackwardData(
handle, output_grad_desc.desc(), output_grad_ptr, filters_desc.desc(), handle, output_grad_desc.desc(), output_grad_ptr, filters_desc.desc(),
filters_ptr, indice_pairs_desc.desc(), indice_pairs_ptr, indice_num, filters_ptr, indice_pairs_desc.desc(), indice_pairs_ptr, indice_num,
_inverse, _subM, indice_convbpdata_workspace_ptr, data_workspace_size, _inverse, _subM, indice_convbpdata_workspace_ptr, data_workspace_size,
input_grad_desc.desc(), input_grad_ptr); input_grad_desc.desc(), input_grad_ptr));
mluOpIndiceConvolutionBackwardFilter( TORCH_MLUOP_CHECK(mluOpIndiceConvolutionBackwardFilter(
handle, features_desc.desc(), features_ptr, output_grad_desc.desc(), handle, features_desc.desc(), features_ptr, output_grad_desc.desc(),
output_grad_ptr, indice_pairs_desc.desc(), indice_pairs_ptr, indice_num, output_grad_ptr, indice_pairs_desc.desc(), indice_pairs_ptr, indice_num,
_inverse, _subM, indice_convbpfilter_workspace_ptr, _inverse, _subM, indice_convbpfilter_workspace_ptr,
filters_workspace_size, filters_grad_desc.desc(), filters_grad_ptr); filters_workspace_size, filters_grad_desc.desc(), filters_grad_ptr));
std::vector<torch::Tensor> result; std::vector<torch::Tensor> result;
result.push_back(input_grad); result.push_back(input_grad);
......
...@@ -30,8 +30,8 @@ void ThreeNNMLUKernelLauncher(int b, int n, int m, const Tensor unknown, ...@@ -30,8 +30,8 @@ void ThreeNNMLUKernelLauncher(int b, int n, int m, const Tensor unknown,
auto handle = mluOpGetCurrentHandle(); auto handle = mluOpGetCurrentHandle();
size_t workspace_size = 0; size_t workspace_size = 0;
mluOpGetThreeNNForwardWorkspaceSize(handle, known_desc.desc(), TORCH_MLUOP_CHECK(mluOpGetThreeNNForwardWorkspaceSize(handle, known_desc.desc(),
&workspace_size); &workspace_size));
auto known_workspace = auto known_workspace =
at::empty(workspace_size, known.options().dtype(at::kByte)); at::empty(workspace_size, known.options().dtype(at::kByte));
...@@ -46,10 +46,10 @@ void ThreeNNMLUKernelLauncher(int b, int n, int m, const Tensor unknown, ...@@ -46,10 +46,10 @@ void ThreeNNMLUKernelLauncher(int b, int n, int m, const Tensor unknown,
auto idx_ptr = idx_impl->cnnlMalloc(); auto idx_ptr = idx_impl->cnnlMalloc();
auto workspace_ptr = workspace_impl->cnnlMalloc(); auto workspace_ptr = workspace_impl->cnnlMalloc();
mluOpThreeNNForward(handle, unknown_desc.desc(), unknown_ptr, TORCH_MLUOP_CHECK(mluOpThreeNNForward(handle, unknown_desc.desc(), unknown_ptr,
known_desc.desc(), known_ptr, workspace_ptr, known_desc.desc(), known_ptr, workspace_ptr,
workspace_size, dist2_desc.desc(), dist2_ptr, workspace_size, dist2_desc.desc(), dist2_ptr,
idx_desc.desc(), idx_ptr); idx_desc.desc(), idx_ptr));
} }
void three_nn_forward_mlu(int b, int n, int m, const Tensor unknown, void three_nn_forward_mlu(int b, int n, int m, const Tensor unknown,
......
...@@ -53,23 +53,23 @@ int HardVoxelizeForwardMLUKernelLauncher( ...@@ -53,23 +53,23 @@ int HardVoxelizeForwardMLUKernelLauncher(
size_t workspace_size; size_t workspace_size;
auto handle = mluOpGetCurrentHandle(); auto handle = mluOpGetCurrentHandle();
mluOpGetVoxelizationWorkspaceSize( TORCH_MLUOP_CHECK(mluOpGetVoxelizationWorkspaceSize(
handle, points_desc.desc(), voxel_size_tensor_desc.desc(), handle, points_desc.desc(), voxel_size_tensor_desc.desc(),
coors_range_tensor_desc.desc(), max_points, max_voxels, NDim, true, coors_range_tensor_desc.desc(), max_points, max_voxels, NDim, true,
voxels_desc.desc(), coors_desc.desc(), num_points_per_voxel_desc.desc(), voxels_desc.desc(), coors_desc.desc(), num_points_per_voxel_desc.desc(),
voxel_num_tensor_desc.desc(), &workspace_size); voxel_num_tensor_desc.desc(), &workspace_size));
auto workspace_tensor = auto workspace_tensor =
at::empty(workspace_size, points.options().dtype(at::kByte)); at::empty(workspace_size, points.options().dtype(at::kByte));
INITIAL_MLU_PARAM_WITH_TENSOR(workspace_tensor); INITIAL_MLU_PARAM_WITH_TENSOR(workspace_tensor);
mluOpVoxelization(handle, points_desc.desc(), points_ptr, TORCH_MLUOP_CHECK(mluOpVoxelization(handle, points_desc.desc(), points_ptr,
voxel_size_tensor_desc.desc(), voxel_size_tensor_ptr, voxel_size_tensor_desc.desc(), voxel_size_tensor_ptr,
coors_range_tensor_desc.desc(), coors_range_tensor_ptr, coors_range_tensor_desc.desc(), coors_range_tensor_ptr,
max_points, max_voxels, NDim, true, workspace_tensor_ptr, max_points, max_voxels, NDim, true, workspace_tensor_ptr,
workspace_size, voxels_desc.desc(), voxels_ptr, workspace_size, voxels_desc.desc(), voxels_ptr,
coors_desc.desc(), coors_ptr, coors_desc.desc(), coors_ptr,
num_points_per_voxel_desc.desc(), num_points_per_voxel_ptr, num_points_per_voxel_desc.desc(), num_points_per_voxel_ptr,
voxel_num_tensor_desc.desc(), voxel_num_tensor_ptr); voxel_num_tensor_desc.desc(), voxel_num_tensor_ptr));
auto voxel_num_cpu = voxel_num_tensor.to(at::kCPU); auto voxel_num_cpu = voxel_num_tensor.to(at::kCPU);
int voxel_num_int = voxel_num_cpu.data_ptr<int>()[0]; int voxel_num_int = voxel_num_cpu.data_ptr<int>()[0];
return voxel_num_int; return voxel_num_int;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment