Unverified Commit c0268ad9 authored by sherie's avatar sherie Committed by GitHub
Browse files

Support torch_npu 2.1 (#2909)

parent 6299bc02
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
#ifndef PYTORCH_NPU_HELPER_HPP_ #ifndef PYTORCH_NPU_HELPER_HPP_
#define PYTORCH_NPU_HELPER_HPP_ #define PYTORCH_NPU_HELPER_HPP_
#include <torch_npu/csrc/aten/NPUNativeFunctions.h> #include <torch_npu/csrc/aten/CustomFunctions.h>
#include <torch_npu/csrc/framework/utils/CalcuOpUtil.h> #include <torch_npu/csrc/framework/utils/CalcuOpUtil.h>
#include <torch_npu/csrc/framework/utils/OpAdapter.h> #include <torch_npu/csrc/framework/utils/OpAdapter.h>
......
...@@ -20,16 +20,16 @@ void bbox_overlaps_npu(const Tensor bboxes1, const Tensor bboxes2, Tensor ious, ...@@ -20,16 +20,16 @@ void bbox_overlaps_npu(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
bboxesFP32 = bboxes1; bboxesFP32 = bboxes1;
gtboxesFP32 = bboxes2; gtboxesFP32 = bboxes2;
} }
if (bboxes2.scalar_type() != at::ScalarType::Float) { if (bboxes2.scalar_type() != at::kFloat) {
bboxesFP32 = NPUNativeFunctions::npu_dtype_cast(bboxesFP32, at::kFloat); bboxesFP32 = bboxesFP32.to(at::kFloat);
gtboxesFP32 = NPUNativeFunctions::npu_dtype_cast(gtboxesFP32, at::kFloat); gtboxesFP32 = gtboxesFP32.to(at::kFloat);
} }
c10::SmallVector<int64_t, SIZE> iousSize = {gtboxesFP32.size(0), c10::SmallVector<int64_t, SIZE> iousSize = {gtboxesFP32.size(0),
bboxesFP32.size(0)}; bboxesFP32.size(0)};
if (aligned) { if (aligned) {
iousSize = {gtboxesFP32.size(0), 1}; iousSize = {gtboxesFP32.size(0), 1};
} }
at::Tensor iousFP32 = OpPreparation::ApplyTensor(bboxesFP32, iousSize); at::Tensor iousFP32 = at::empty(iousSize, bboxesFP32.options());
bboxesFP32 = aligned ? bboxesFP32.transpose(0, 1) : bboxesFP32; bboxesFP32 = aligned ? bboxesFP32.transpose(0, 1) : bboxesFP32;
gtboxesFP32 = aligned ? gtboxesFP32.transpose(0, 1) : gtboxesFP32; gtboxesFP32 = aligned ? gtboxesFP32.transpose(0, 1) : gtboxesFP32;
OpCommand cmd; OpCommand cmd;
...@@ -41,8 +41,8 @@ void bbox_overlaps_npu(const Tensor bboxes1, const Tensor bboxes2, Tensor ious, ...@@ -41,8 +41,8 @@ void bbox_overlaps_npu(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
.Attr("eps", (float)offset) .Attr("eps", (float)offset)
.Attr("aligned", aligned) .Attr("aligned", aligned)
.Run(); .Run();
if (bboxes2.scalar_type() != at::ScalarType::Float) { if (bboxes2.scalar_type() != at::kFloat) {
iousFP32 = NPUNativeFunctions::npu_dtype_cast(iousFP32, at::kHalf); iousFP32 = iousFP32.to(at::kHalf);
} }
iousFP32 = swap_flag ? iousFP32.transpose(0, 1) : iousFP32; iousFP32 = swap_flag ? iousFP32.transpose(0, 1) : iousFP32;
ious.copy_(iousFP32); ious.copy_(iousFP32);
......
...@@ -12,15 +12,13 @@ void sigmoid_focal_loss_forward_npu(Tensor input, Tensor target, Tensor weight, ...@@ -12,15 +12,13 @@ void sigmoid_focal_loss_forward_npu(Tensor input, Tensor target, Tensor weight,
target_y = at::mul(target_y, -1.0); target_y = at::mul(target_y, -1.0);
target_y = at::add(target_y, 1.0); target_y = at::add(target_y, 1.0);
} else { } else {
target_y = at_npu::native::NPUNativeFunctions::one_hot(target, n_class); target_y = at::one_hot(target, n_class);
} }
target_y = target_y = target_y.to(at::kInt);
at_npu::native::NPUNativeFunctions::npu_dtype_cast(target_y, at::kInt);
int64_t weight_size = weight.size(0); int64_t weight_size = weight.size(0);
at::Tensor weight_y = at::ones_like(input); at::Tensor weight_y = at::ones_like(input);
if (weight_size > 0) { if (weight_size > 0) {
weight_y = at_npu::native::NPUNativeFunctions::npu_broadcast(weight, weight_y = at::broadcast_to(weight, input.sizes());
input.sizes());
} }
OpCommand cmd; OpCommand cmd;
string reduction = "none"; string reduction = "none";
...@@ -46,18 +44,16 @@ void sigmoid_focal_loss_backward_npu(Tensor input, Tensor target, Tensor weight, ...@@ -46,18 +44,16 @@ void sigmoid_focal_loss_backward_npu(Tensor input, Tensor target, Tensor weight,
if (n_class == 1) { if (n_class == 1) {
target_y = at::reshape(target, input.sizes()); target_y = at::reshape(target, input.sizes());
} else { } else {
target_y = at_npu::native::NPUNativeFunctions::one_hot(target, n_class); target_y = at::one_hot(target, n_class);
target_y = at::mul(target_y, -1.0); target_y = at::mul(target_y, -1.0);
target_y = at::add(target_y, 1.0); target_y = at::add(target_y, 1.0);
} }
target_y = target_y = target_y.to(at::kInt);
at_npu::native::NPUNativeFunctions::npu_dtype_cast(target_y, at::kInt);
at::Tensor grad_up = at::ones_like(input); at::Tensor grad_up = at::ones_like(input);
int64_t weight_size = weight.size(0); int64_t weight_size = weight.size(0);
at::Tensor weight_y = at::ones_like(input); at::Tensor weight_y = at::ones_like(input);
if (weight_size > 0) { if (weight_size > 0) {
weight_y = at_npu::native::NPUNativeFunctions::npu_broadcast(weight, weight_y = at::broadcast_to(weight, input.sizes());
input.sizes());
} }
OpCommand cmd; OpCommand cmd;
string reduction = "none"; string reduction = "none";
...@@ -80,15 +76,12 @@ void sigmoid_focal_loss_backward_impl(Tensor input, Tensor target, ...@@ -80,15 +76,12 @@ void sigmoid_focal_loss_backward_impl(Tensor input, Tensor target,
void softmax_focal_loss_forward_npu(Tensor input, Tensor target, Tensor weight, void softmax_focal_loss_forward_npu(Tensor input, Tensor target, Tensor weight,
Tensor output, float gamma, float alpha) { Tensor output, float gamma, float alpha) {
int64_t n_class = input.size(1); int64_t n_class = input.size(1);
at::Tensor target_y = at::Tensor target_y = at::one_hot(target, n_class);
at_npu::native::NPUNativeFunctions::one_hot(target, n_class); target_y = target_y.to(at::kInt);
target_y =
at_npu::native::NPUNativeFunctions::npu_dtype_cast(target_y, at::kInt);
int64_t weight_size = weight.size(0); int64_t weight_size = weight.size(0);
at::Tensor weight_y = at::ones_like(input); at::Tensor weight_y = at::ones_like(input);
if (weight_size > 0) { if (weight_size > 0) {
weight_y = at_npu::native::NPUNativeFunctions::npu_broadcast(weight, weight_y = at::broadcast_to(weight, input.sizes());
input.sizes());
} }
at::Tensor op_output = at::ones_like(input); at::Tensor op_output = at::ones_like(input);
OpCommand cmd; OpCommand cmd;
...@@ -107,8 +100,7 @@ void softmax_focal_loss_forward_npu(Tensor input, Tensor target, Tensor weight, ...@@ -107,8 +100,7 @@ void softmax_focal_loss_forward_npu(Tensor input, Tensor target, Tensor weight,
c10::SmallVector<int64_t, 2> sizes = {n_batch, 1}; c10::SmallVector<int64_t, 2> sizes = {n_batch, 1};
at::IntArrayRef offset = at::IntArrayRef(offsets); at::IntArrayRef offset = at::IntArrayRef(offsets);
at::IntArrayRef size = at::IntArrayRef(sizes); at::IntArrayRef size = at::IntArrayRef(sizes);
at_npu::native::NPUNativeFunctions::npu_slice_out(op_output, offset, size, at_npu::native::custom_ops::npu_slice_out(op_output, offset, size, output);
output);
} }
void softmax_focal_loss_forward_impl(Tensor input, Tensor target, Tensor weight, void softmax_focal_loss_forward_impl(Tensor input, Tensor target, Tensor weight,
...@@ -119,16 +111,13 @@ void softmax_focal_loss_backward_npu(Tensor input, Tensor target, Tensor weight, ...@@ -119,16 +111,13 @@ void softmax_focal_loss_backward_npu(Tensor input, Tensor target, Tensor weight,
Tensor buff, Tensor grad_input, Tensor buff, Tensor grad_input,
float gamma, float alpha) { float gamma, float alpha) {
int64_t n_class = input.size(1); int64_t n_class = input.size(1);
at::Tensor target_y = at::Tensor target_y = at::one_hot(target, n_class);
at_npu::native::NPUNativeFunctions::one_hot(target, n_class); target_y = target_y.to(at::kInt);
target_y =
at_npu::native::NPUNativeFunctions::npu_dtype_cast(target_y, at::kInt);
at::Tensor grad_up = at::ones_like(input); at::Tensor grad_up = at::ones_like(input);
int64_t weight_size = weight.size(0); int64_t weight_size = weight.size(0);
at::Tensor weight_y = at::ones_like(input); at::Tensor weight_y = at::ones_like(input);
if (weight_size > 0) { if (weight_size > 0) {
weight_y = at_npu::native::NPUNativeFunctions::npu_broadcast(weight, weight_y = at::broadcast_to(weight, input.sizes());
input.sizes());
} }
OpCommand cmd; OpCommand cmd;
string reduction = "none"; string reduction = "none";
......
...@@ -25,8 +25,9 @@ Tensor fused_bias_leakyrelu_npu(const Tensor &input, const Tensor &bias, ...@@ -25,8 +25,9 @@ Tensor fused_bias_leakyrelu_npu(const Tensor &input, const Tensor &bias,
} }
} }
at::Tensor bias_tmp = at::reshape(bias, input_size_tmp); at::Tensor bias_tmp = at::reshape(bias, input_size_tmp);
at::Tensor bias_ = at_npu::native::NPUNativeFunctions::npu_broadcast( // at::Tensor bias_ = at_npu::native::NPUNativeFunctions::npu_broadcast(
bias_tmp, input.sizes()); // bias_tmp, input.sizes());
at::Tensor bias_ = at::broadcast_to(bias_tmp, input.sizes());
OpCommand cmd; OpCommand cmd;
cmd.Name("FusedBiasLeakyRelu") cmd.Name("FusedBiasLeakyRelu")
.Input(input) .Input(input)
......
...@@ -7,20 +7,16 @@ Tensor nms_npu(Tensor boxes, Tensor scores, float iou_threshold, int offset) { ...@@ -7,20 +7,16 @@ Tensor nms_npu(Tensor boxes, Tensor scores, float iou_threshold, int offset) {
TORCH_CHECK((boxes.scalar_type() == at::ScalarType::Float), TORCH_CHECK((boxes.scalar_type() == at::ScalarType::Float),
"The type of boxes tensor passed in nms_npu should be float"); "The type of boxes tensor passed in nms_npu should be float");
int64_t offset_64 = offset; int64_t offset_64 = offset;
at::Tensor iou_threshold_y = at_npu::native::OpPreparation::ApplyTensor( at::Tensor iou_threshold_y =
{}, boxes.options().dtype(at::kFloat), boxes) at::empty({}, boxes.options().dtype(at::kFloat)).fill_(iou_threshold);
.fill_(iou_threshold);
at::Tensor scores_threshold_y = at::Tensor scores_threshold_y =
at_npu::native::OpPreparation::ApplyTensor( at::empty({}, boxes.options().dtype(at::kFloat)).fill_(0);
{}, boxes.options().dtype(at::kFloat), boxes) at::Tensor max_outputsize_y =
.fill_(0); at::empty({}, boxes.options().dtype(at::kInt)).fill_(boxes.size(0));
at::Tensor max_outputsize_y = at_npu::native::OpPreparation::ApplyTensor(
{}, boxes.options().dtype(at::kInt), boxes)
.fill_(boxes.size(0));
c10::SmallVector<int64_t, SIZE> outputsize = {boxes.size(0)}; c10::SmallVector<int64_t, SIZE> outputsize = {boxes.size(0)};
at::Tensor output = at_npu::native::OpPreparation::ApplyTensor( at::Tensor output =
outputsize, boxes.options().dtype(at::kInt), boxes) at::empty(outputsize, boxes.options().dtype(at::kInt)).fill_(-1);
.fill_(-1);
OpCommand cmd; OpCommand cmd;
cmd.Name("NonMaxSuppressionV3") cmd.Name("NonMaxSuppressionV3")
.Input(boxes) .Input(boxes)
...@@ -32,11 +28,10 @@ Tensor nms_npu(Tensor boxes, Tensor scores, float iou_threshold, int offset) { ...@@ -32,11 +28,10 @@ Tensor nms_npu(Tensor boxes, Tensor scores, float iou_threshold, int offset) {
.Output(output) .Output(output)
.Run(); .Run();
auto outputsizeBool = at::gt(output, -1); auto outputsizeBool = at::gt(output, -1);
auto outputsizeInt = outputsizeBool.to(at::ScalarType::Int); auto outputsizeInt = outputsizeBool.to(at::kInt);
auto countLen = at::sum(outputsizeInt, at::ScalarType::Int); auto countLen = at::sum(outputsizeInt, at::kInt);
at::Tensor actual_output = output.slice(0, 0, countLen.item().toLong()); at::Tensor actual_output = output.slice(0, 0, countLen.item().toLong());
actual_output = at_npu::native::NPUNativeFunctions::npu_dtype_cast( actual_output = actual_output.to(at::kLong);
actual_output, at::kLong);
return actual_output; return actual_output;
} }
......
...@@ -7,14 +7,15 @@ Tensor nms_rotated_npu(const Tensor dets, const Tensor scores, ...@@ -7,14 +7,15 @@ Tensor nms_rotated_npu(const Tensor dets, const Tensor scores,
auto originDtype = dets.scalar_type(); auto originDtype = dets.scalar_type();
at::Tensor detsCast = dets; at::Tensor detsCast = dets;
at::Tensor scoresCast = scores; at::Tensor scoresCast = scores;
if (originDtype != at::ScalarType::Float) { if (originDtype != at::kFloat) {
detsCast = NPUNativeFunctions::npu_dtype_cast(dets, at::kFloat); detsCast = detsCast.to(at::kFloat);
scoresCast = NPUNativeFunctions::npu_dtype_cast(scores, at::kFloat); scoresCast = scoresCast.to(at::kFloat);
} }
c10::SmallVector<int64_t, SIZE> selectedIndexSize = {dets.size(0)}; c10::SmallVector<int64_t, SIZE> selectedIndexSize = {dets.size(0)};
at::Tensor selectedBox = OpPreparation::ApplyTensor(dets);
at::Tensor selectedIndex = OpPreparation::ApplyTensor( at::Tensor selectedBox = at::empty_like(dets);
selectedIndexSize, dets.options().dtype(at::kInt), dets); at::Tensor selectedIndex =
at::empty(selectedIndexSize, dets.options().dtype(at::kInt));
c10::SmallVector<int64_t, N> output_sync_idx = {0, 1}; c10::SmallVector<int64_t, N> output_sync_idx = {0, 1};
OpCommand cmd; OpCommand cmd;
...@@ -27,6 +28,6 @@ Tensor nms_rotated_npu(const Tensor dets, const Tensor scores, ...@@ -27,6 +28,6 @@ Tensor nms_rotated_npu(const Tensor dets, const Tensor scores,
.Output(selectedIndex) .Output(selectedIndex)
.Attr("iou_threshold", (float)iou_threshold) .Attr("iou_threshold", (float)iou_threshold)
.Run(); .Run();
selectedIndex = NPUNativeFunctions::npu_dtype_cast(selectedIndex, at::kLong); selectedIndex = selectedIndex.to(at::kLong);
return selectedIndex; return selectedIndex;
} }
...@@ -42,7 +42,7 @@ void roi_align_backward_npu(Tensor grad_output, Tensor rois, Tensor argmax_y, ...@@ -42,7 +42,7 @@ void roi_align_backward_npu(Tensor grad_output, Tensor rois, Tensor argmax_y,
roi_end_mode = 0; roi_end_mode = 0;
} }
c10::SmallVector<int64_t, SIZE> xdiff_shape = c10::SmallVector<int64_t, SIZE> xdiff_shape =
at_npu::native::array_to_small_vector(grad_input.sizes()); array_to_small_vector(grad_input.sizes());
OpCommand cmd; OpCommand cmd;
cmd.Name("ROIAlignGrad") cmd.Name("ROIAlignGrad")
.Input(grad_output) .Input(grad_output)
......
...@@ -9,8 +9,8 @@ void roi_pool_forward_npu(Tensor input, Tensor rois, Tensor output, ...@@ -9,8 +9,8 @@ void roi_pool_forward_npu(Tensor input, Tensor rois, Tensor output,
int64_t pooled_height_64 = pooled_height; int64_t pooled_height_64 = pooled_height;
int64_t pooled_width_64 = pooled_width; int64_t pooled_width_64 = pooled_width;
int64_t pooled_channel = 1; int64_t pooled_channel = 1;
at::Tensor roi_actual_num = at_npu::native::OpPreparation::ApplyTensor( at::Tensor roi_actual_num =
{}, rois.options().dtype(at::kInt), rois); at::empty_like(rois, rois.options().dtype(at::kInt));
if (input.sizes()[1] % 16 == 0) { if (input.sizes()[1] % 16 == 0) {
OpCommand cmd; OpCommand cmd;
cmd.Name("RoiPoolingWithArgMax") cmd.Name("RoiPoolingWithArgMax")
...@@ -50,8 +50,8 @@ void roi_pool_backward_npu(Tensor grad_output, Tensor rois, Tensor argmax, ...@@ -50,8 +50,8 @@ void roi_pool_backward_npu(Tensor grad_output, Tensor rois, Tensor argmax,
int64_t pooled_height_64 = pooled_height; int64_t pooled_height_64 = pooled_height;
int64_t pooled_width_64 = pooled_width; int64_t pooled_width_64 = pooled_width;
int64_t pooled_channel = 1; int64_t pooled_channel = 1;
at::Tensor roi_actual_num = at_npu::native::OpPreparation::ApplyTensor( at::Tensor roi_actual_num =
{}, rois.options().dtype(at::kInt), rois); at::empty_like(rois, rois.options().dtype(at::kInt));
at::Tensor x = at::ones_like(grad_input); at::Tensor x = at::ones_like(grad_input);
OpCommand cmd; OpCommand cmd;
cmd.Name("RoiPoolingGradWithArgMax") cmd.Name("RoiPoolingGradWithArgMax")
......
...@@ -19,8 +19,7 @@ int hard_voxelize_forward_npu(const at::Tensor &points, at::Tensor &voxels, ...@@ -19,8 +19,7 @@ int hard_voxelize_forward_npu(const at::Tensor &points, at::Tensor &voxels,
const int max_points, const int max_voxels, const int max_points, const int max_voxels,
const int NDim = 3) { const int NDim = 3) {
at::Tensor voxel_num_tmp = OpPreparation::ApplyTensor(points, {1}); at::Tensor voxel_num_tmp = OpPreparation::ApplyTensor(points, {1});
at::Tensor voxel_num = at_npu::native::NPUNativeFunctions::npu_dtype_cast( at::Tensor voxel_num = voxel_num_tmp.to(at::kInt);
voxel_num_tmp, at::kInt);
at::Tensor voxel_size_cpu = at::from_blob( at::Tensor voxel_size_cpu = at::from_blob(
const_cast<float *>(voxel_size.data()), {3}, dtype(at::kFloat)); const_cast<float *>(voxel_size.data()), {3}, dtype(at::kFloat));
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment