Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
MMCV
Commits
c0268ad9
Unverified
Commit
c0268ad9
authored
Sep 05, 2023
by
sherie
Committed by
GitHub
Sep 05, 2023
Browse files
Support torch_npu 2.1 (#2909)
parent
6299bc02
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
48 additions
and
63 deletions
+48
-63
mmcv/ops/csrc/common/pytorch_npu_helper.hpp
mmcv/ops/csrc/common/pytorch_npu_helper.hpp
+1
-1
mmcv/ops/csrc/pytorch/npu/bbox_overlaps_npu.cpp
mmcv/ops/csrc/pytorch/npu/bbox_overlaps_npu.cpp
+6
-6
mmcv/ops/csrc/pytorch/npu/focal_loss_npu.cpp
mmcv/ops/csrc/pytorch/npu/focal_loss_npu.cpp
+13
-24
mmcv/ops/csrc/pytorch/npu/fused_bias_leakyrelu_npu.cpp
mmcv/ops/csrc/pytorch/npu/fused_bias_leakyrelu_npu.cpp
+3
-2
mmcv/ops/csrc/pytorch/npu/nms_npu.cpp
mmcv/ops/csrc/pytorch/npu/nms_npu.cpp
+11
-16
mmcv/ops/csrc/pytorch/npu/nms_rotated_npu.cpp
mmcv/ops/csrc/pytorch/npu/nms_rotated_npu.cpp
+8
-7
mmcv/ops/csrc/pytorch/npu/roi_align_npu.cpp
mmcv/ops/csrc/pytorch/npu/roi_align_npu.cpp
+1
-1
mmcv/ops/csrc/pytorch/npu/roi_pool_npu.cpp
mmcv/ops/csrc/pytorch/npu/roi_pool_npu.cpp
+4
-4
mmcv/ops/csrc/pytorch/npu/voxelization_npu.cpp
mmcv/ops/csrc/pytorch/npu/voxelization_npu.cpp
+1
-2
No files found.
mmcv/ops/csrc/common/pytorch_npu_helper.hpp
View file @
c0268ad9
...
...
@@ -18,7 +18,7 @@
#ifndef PYTORCH_NPU_HELPER_HPP_
#define PYTORCH_NPU_HELPER_HPP_
#include <torch_npu/csrc/aten/
NPUNative
Functions.h>
#include <torch_npu/csrc/aten/
Custom
Functions.h>
#include <torch_npu/csrc/framework/utils/CalcuOpUtil.h>
#include <torch_npu/csrc/framework/utils/OpAdapter.h>
...
...
mmcv/ops/csrc/pytorch/npu/bbox_overlaps_npu.cpp
View file @
c0268ad9
...
...
@@ -20,16 +20,16 @@ void bbox_overlaps_npu(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
bboxesFP32
=
bboxes1
;
gtboxesFP32
=
bboxes2
;
}
if
(
bboxes2
.
scalar_type
()
!=
at
::
ScalarType
::
Float
)
{
bboxesFP32
=
NPUNativeFunctions
::
npu_dtype_cast
(
bboxesFP32
,
at
::
kFloat
);
gtboxesFP32
=
NPUNativeFunctions
::
npu_dtype_cast
(
gtboxesFP32
,
at
::
kFloat
);
if
(
bboxes2
.
scalar_type
()
!=
at
::
k
Float
)
{
bboxesFP32
=
bboxesFP32
.
to
(
at
::
kFloat
);
gtboxesFP32
=
gtboxesFP32
.
to
(
at
::
kFloat
);
}
c10
::
SmallVector
<
int64_t
,
SIZE
>
iousSize
=
{
gtboxesFP32
.
size
(
0
),
bboxesFP32
.
size
(
0
)};
if
(
aligned
)
{
iousSize
=
{
gtboxesFP32
.
size
(
0
),
1
};
}
at
::
Tensor
iousFP32
=
OpPreparation
::
ApplyTensor
(
bboxesFP32
,
iousSize
);
at
::
Tensor
iousFP32
=
at
::
empty
(
iousSize
,
bboxesFP32
.
options
()
);
bboxesFP32
=
aligned
?
bboxesFP32
.
transpose
(
0
,
1
)
:
bboxesFP32
;
gtboxesFP32
=
aligned
?
gtboxesFP32
.
transpose
(
0
,
1
)
:
gtboxesFP32
;
OpCommand
cmd
;
...
...
@@ -41,8 +41,8 @@ void bbox_overlaps_npu(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
.
Attr
(
"eps"
,
(
float
)
offset
)
.
Attr
(
"aligned"
,
aligned
)
.
Run
();
if
(
bboxes2
.
scalar_type
()
!=
at
::
ScalarType
::
Float
)
{
iousFP32
=
NPUNativeFunctions
::
npu_dtype_cast
(
iousFP32
,
at
::
kHalf
);
if
(
bboxes2
.
scalar_type
()
!=
at
::
k
Float
)
{
iousFP32
=
iousFP32
.
to
(
at
::
kHalf
);
}
iousFP32
=
swap_flag
?
iousFP32
.
transpose
(
0
,
1
)
:
iousFP32
;
ious
.
copy_
(
iousFP32
);
...
...
mmcv/ops/csrc/pytorch/npu/focal_loss_npu.cpp
View file @
c0268ad9
...
...
@@ -12,15 +12,13 @@ void sigmoid_focal_loss_forward_npu(Tensor input, Tensor target, Tensor weight,
target_y
=
at
::
mul
(
target_y
,
-
1.0
);
target_y
=
at
::
add
(
target_y
,
1.0
);
}
else
{
target_y
=
at
_npu
::
native
::
NPUNativeFunctions
::
one_hot
(
target
,
n_class
);
target_y
=
at
::
one_hot
(
target
,
n_class
);
}
target_y
=
at_npu
::
native
::
NPUNativeFunctions
::
npu_dtype_cast
(
target_y
,
at
::
kInt
);
target_y
=
target_y
.
to
(
at
::
kInt
);
int64_t
weight_size
=
weight
.
size
(
0
);
at
::
Tensor
weight_y
=
at
::
ones_like
(
input
);
if
(
weight_size
>
0
)
{
weight_y
=
at_npu
::
native
::
NPUNativeFunctions
::
npu_broadcast
(
weight
,
input
.
sizes
());
weight_y
=
at
::
broadcast_to
(
weight
,
input
.
sizes
());
}
OpCommand
cmd
;
string
reduction
=
"none"
;
...
...
@@ -46,18 +44,16 @@ void sigmoid_focal_loss_backward_npu(Tensor input, Tensor target, Tensor weight,
if
(
n_class
==
1
)
{
target_y
=
at
::
reshape
(
target
,
input
.
sizes
());
}
else
{
target_y
=
at
_npu
::
native
::
NPUNativeFunctions
::
one_hot
(
target
,
n_class
);
target_y
=
at
::
one_hot
(
target
,
n_class
);
target_y
=
at
::
mul
(
target_y
,
-
1.0
);
target_y
=
at
::
add
(
target_y
,
1.0
);
}
target_y
=
at_npu
::
native
::
NPUNativeFunctions
::
npu_dtype_cast
(
target_y
,
at
::
kInt
);
target_y
=
target_y
.
to
(
at
::
kInt
);
at
::
Tensor
grad_up
=
at
::
ones_like
(
input
);
int64_t
weight_size
=
weight
.
size
(
0
);
at
::
Tensor
weight_y
=
at
::
ones_like
(
input
);
if
(
weight_size
>
0
)
{
weight_y
=
at_npu
::
native
::
NPUNativeFunctions
::
npu_broadcast
(
weight
,
input
.
sizes
());
weight_y
=
at
::
broadcast_to
(
weight
,
input
.
sizes
());
}
OpCommand
cmd
;
string
reduction
=
"none"
;
...
...
@@ -80,15 +76,12 @@ void sigmoid_focal_loss_backward_impl(Tensor input, Tensor target,
void
softmax_focal_loss_forward_npu
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
Tensor
output
,
float
gamma
,
float
alpha
)
{
int64_t
n_class
=
input
.
size
(
1
);
at
::
Tensor
target_y
=
at_npu
::
native
::
NPUNativeFunctions
::
one_hot
(
target
,
n_class
);
target_y
=
at_npu
::
native
::
NPUNativeFunctions
::
npu_dtype_cast
(
target_y
,
at
::
kInt
);
at
::
Tensor
target_y
=
at
::
one_hot
(
target
,
n_class
);
target_y
=
target_y
.
to
(
at
::
kInt
);
int64_t
weight_size
=
weight
.
size
(
0
);
at
::
Tensor
weight_y
=
at
::
ones_like
(
input
);
if
(
weight_size
>
0
)
{
weight_y
=
at_npu
::
native
::
NPUNativeFunctions
::
npu_broadcast
(
weight
,
input
.
sizes
());
weight_y
=
at
::
broadcast_to
(
weight
,
input
.
sizes
());
}
at
::
Tensor
op_output
=
at
::
ones_like
(
input
);
OpCommand
cmd
;
...
...
@@ -107,8 +100,7 @@ void softmax_focal_loss_forward_npu(Tensor input, Tensor target, Tensor weight,
c10
::
SmallVector
<
int64_t
,
2
>
sizes
=
{
n_batch
,
1
};
at
::
IntArrayRef
offset
=
at
::
IntArrayRef
(
offsets
);
at
::
IntArrayRef
size
=
at
::
IntArrayRef
(
sizes
);
at_npu
::
native
::
NPUNativeFunctions
::
npu_slice_out
(
op_output
,
offset
,
size
,
output
);
at_npu
::
native
::
custom_ops
::
npu_slice_out
(
op_output
,
offset
,
size
,
output
);
}
void
softmax_focal_loss_forward_impl
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
...
...
@@ -119,16 +111,13 @@ void softmax_focal_loss_backward_npu(Tensor input, Tensor target, Tensor weight,
Tensor
buff
,
Tensor
grad_input
,
float
gamma
,
float
alpha
)
{
int64_t
n_class
=
input
.
size
(
1
);
at
::
Tensor
target_y
=
at_npu
::
native
::
NPUNativeFunctions
::
one_hot
(
target
,
n_class
);
target_y
=
at_npu
::
native
::
NPUNativeFunctions
::
npu_dtype_cast
(
target_y
,
at
::
kInt
);
at
::
Tensor
target_y
=
at
::
one_hot
(
target
,
n_class
);
target_y
=
target_y
.
to
(
at
::
kInt
);
at
::
Tensor
grad_up
=
at
::
ones_like
(
input
);
int64_t
weight_size
=
weight
.
size
(
0
);
at
::
Tensor
weight_y
=
at
::
ones_like
(
input
);
if
(
weight_size
>
0
)
{
weight_y
=
at_npu
::
native
::
NPUNativeFunctions
::
npu_broadcast
(
weight
,
input
.
sizes
());
weight_y
=
at
::
broadcast_to
(
weight
,
input
.
sizes
());
}
OpCommand
cmd
;
string
reduction
=
"none"
;
...
...
mmcv/ops/csrc/pytorch/npu/fused_bias_leakyrelu_npu.cpp
View file @
c0268ad9
...
...
@@ -25,8 +25,9 @@ Tensor fused_bias_leakyrelu_npu(const Tensor &input, const Tensor &bias,
}
}
at
::
Tensor
bias_tmp
=
at
::
reshape
(
bias
,
input_size_tmp
);
at
::
Tensor
bias_
=
at_npu
::
native
::
NPUNativeFunctions
::
npu_broadcast
(
bias_tmp
,
input
.
sizes
());
// at::Tensor bias_ = at_npu::native::NPUNativeFunctions::npu_broadcast(
// bias_tmp, input.sizes());
at
::
Tensor
bias_
=
at
::
broadcast_to
(
bias_tmp
,
input
.
sizes
());
OpCommand
cmd
;
cmd
.
Name
(
"FusedBiasLeakyRelu"
)
.
Input
(
input
)
...
...
mmcv/ops/csrc/pytorch/npu/nms_npu.cpp
View file @
c0268ad9
...
...
@@ -7,20 +7,16 @@ Tensor nms_npu(Tensor boxes, Tensor scores, float iou_threshold, int offset) {
TORCH_CHECK
((
boxes
.
scalar_type
()
==
at
::
ScalarType
::
Float
),
"The type of boxes tensor passed in nms_npu should be float"
);
int64_t
offset_64
=
offset
;
at
::
Tensor
iou_threshold_y
=
at_npu
::
native
::
OpPreparation
::
ApplyTensor
(
{},
boxes
.
options
().
dtype
(
at
::
kFloat
),
boxes
)
.
fill_
(
iou_threshold
);
at
::
Tensor
iou_threshold_y
=
at
::
empty
({},
boxes
.
options
().
dtype
(
at
::
kFloat
)).
fill_
(
iou_threshold
);
at
::
Tensor
scores_threshold_y
=
at_npu
::
native
::
OpPreparation
::
ApplyTensor
(
{},
boxes
.
options
().
dtype
(
at
::
kFloat
),
boxes
)
.
fill_
(
0
);
at
::
Tensor
max_outputsize_y
=
at_npu
::
native
::
OpPreparation
::
ApplyTensor
(
{},
boxes
.
options
().
dtype
(
at
::
kInt
),
boxes
)
.
fill_
(
boxes
.
size
(
0
));
at
::
empty
({},
boxes
.
options
().
dtype
(
at
::
kFloat
)).
fill_
(
0
);
at
::
Tensor
max_outputsize_y
=
at
::
empty
({},
boxes
.
options
().
dtype
(
at
::
kInt
)).
fill_
(
boxes
.
size
(
0
));
c10
::
SmallVector
<
int64_t
,
SIZE
>
outputsize
=
{
boxes
.
size
(
0
)};
at
::
Tensor
output
=
at_npu
::
native
::
OpPreparation
::
ApplyTensor
(
outputsize
,
boxes
.
options
().
dtype
(
at
::
kInt
),
boxes
)
.
fill_
(
-
1
);
at
::
Tensor
output
=
at
::
empty
(
outputsize
,
boxes
.
options
().
dtype
(
at
::
kInt
)).
fill_
(
-
1
);
OpCommand
cmd
;
cmd
.
Name
(
"NonMaxSuppressionV3"
)
.
Input
(
boxes
)
...
...
@@ -32,11 +28,10 @@ Tensor nms_npu(Tensor boxes, Tensor scores, float iou_threshold, int offset) {
.
Output
(
output
)
.
Run
();
auto
outputsizeBool
=
at
::
gt
(
output
,
-
1
);
auto
outputsizeInt
=
outputsizeBool
.
to
(
at
::
ScalarType
::
Int
);
auto
countLen
=
at
::
sum
(
outputsizeInt
,
at
::
ScalarType
::
Int
);
auto
outputsizeInt
=
outputsizeBool
.
to
(
at
::
k
Int
);
auto
countLen
=
at
::
sum
(
outputsizeInt
,
at
::
k
Int
);
at
::
Tensor
actual_output
=
output
.
slice
(
0
,
0
,
countLen
.
item
().
toLong
());
actual_output
=
at_npu
::
native
::
NPUNativeFunctions
::
npu_dtype_cast
(
actual_output
,
at
::
kLong
);
actual_output
=
actual_output
.
to
(
at
::
kLong
);
return
actual_output
;
}
...
...
mmcv/ops/csrc/pytorch/npu/nms_rotated_npu.cpp
View file @
c0268ad9
...
...
@@ -7,14 +7,15 @@ Tensor nms_rotated_npu(const Tensor dets, const Tensor scores,
auto
originDtype
=
dets
.
scalar_type
();
at
::
Tensor
detsCast
=
dets
;
at
::
Tensor
scoresCast
=
scores
;
if
(
originDtype
!=
at
::
ScalarType
::
Float
)
{
detsCast
=
NPUNativeFunctions
::
npu_dtype_cast
(
dets
,
at
::
kFloat
);
scoresCast
=
NPUNativeFunctions
::
npu_dtype_cast
(
scores
,
at
::
kFloat
);
if
(
originDtype
!=
at
::
k
Float
)
{
detsCast
=
detsCast
.
to
(
at
::
kFloat
);
scoresCast
=
scoresCast
.
to
(
at
::
kFloat
);
}
c10
::
SmallVector
<
int64_t
,
SIZE
>
selectedIndexSize
=
{
dets
.
size
(
0
)};
at
::
Tensor
selectedBox
=
OpPreparation
::
ApplyTensor
(
dets
);
at
::
Tensor
selectedIndex
=
OpPreparation
::
ApplyTensor
(
selectedIndexSize
,
dets
.
options
().
dtype
(
at
::
kInt
),
dets
);
at
::
Tensor
selectedBox
=
at
::
empty_like
(
dets
);
at
::
Tensor
selectedIndex
=
at
::
empty
(
selectedIndexSize
,
dets
.
options
().
dtype
(
at
::
kInt
));
c10
::
SmallVector
<
int64_t
,
N
>
output_sync_idx
=
{
0
,
1
};
OpCommand
cmd
;
...
...
@@ -27,6 +28,6 @@ Tensor nms_rotated_npu(const Tensor dets, const Tensor scores,
.
Output
(
selectedIndex
)
.
Attr
(
"iou_threshold"
,
(
float
)
iou_threshold
)
.
Run
();
selectedIndex
=
NPUNativeFunctions
::
npu_dtype_cast
(
selectedIndex
,
at
::
kLong
);
selectedIndex
=
selectedIndex
.
to
(
at
::
kLong
);
return
selectedIndex
;
}
mmcv/ops/csrc/pytorch/npu/roi_align_npu.cpp
View file @
c0268ad9
...
...
@@ -42,7 +42,7 @@ void roi_align_backward_npu(Tensor grad_output, Tensor rois, Tensor argmax_y,
roi_end_mode
=
0
;
}
c10
::
SmallVector
<
int64_t
,
SIZE
>
xdiff_shape
=
at_npu
::
native
::
array_to_small_vector
(
grad_input
.
sizes
());
array_to_small_vector
(
grad_input
.
sizes
());
OpCommand
cmd
;
cmd
.
Name
(
"ROIAlignGrad"
)
.
Input
(
grad_output
)
...
...
mmcv/ops/csrc/pytorch/npu/roi_pool_npu.cpp
View file @
c0268ad9
...
...
@@ -9,8 +9,8 @@ void roi_pool_forward_npu(Tensor input, Tensor rois, Tensor output,
int64_t
pooled_height_64
=
pooled_height
;
int64_t
pooled_width_64
=
pooled_width
;
int64_t
pooled_channel
=
1
;
at
::
Tensor
roi_actual_num
=
at_npu
::
native
::
OpPreparation
::
ApplyTensor
(
{}
,
rois
.
options
().
dtype
(
at
::
kInt
)
,
rois
);
at
::
Tensor
roi_actual_num
=
at
::
empty_like
(
rois
,
rois
.
options
().
dtype
(
at
::
kInt
));
if
(
input
.
sizes
()[
1
]
%
16
==
0
)
{
OpCommand
cmd
;
cmd
.
Name
(
"RoiPoolingWithArgMax"
)
...
...
@@ -50,8 +50,8 @@ void roi_pool_backward_npu(Tensor grad_output, Tensor rois, Tensor argmax,
int64_t
pooled_height_64
=
pooled_height
;
int64_t
pooled_width_64
=
pooled_width
;
int64_t
pooled_channel
=
1
;
at
::
Tensor
roi_actual_num
=
at_npu
::
native
::
OpPreparation
::
ApplyTensor
(
{}
,
rois
.
options
().
dtype
(
at
::
kInt
)
,
rois
);
at
::
Tensor
roi_actual_num
=
at
::
empty_like
(
rois
,
rois
.
options
().
dtype
(
at
::
kInt
));
at
::
Tensor
x
=
at
::
ones_like
(
grad_input
);
OpCommand
cmd
;
cmd
.
Name
(
"RoiPoolingGradWithArgMax"
)
...
...
mmcv/ops/csrc/pytorch/npu/voxelization_npu.cpp
View file @
c0268ad9
...
...
@@ -19,8 +19,7 @@ int hard_voxelize_forward_npu(const at::Tensor &points, at::Tensor &voxels,
const
int
max_points
,
const
int
max_voxels
,
const
int
NDim
=
3
)
{
at
::
Tensor
voxel_num_tmp
=
OpPreparation
::
ApplyTensor
(
points
,
{
1
});
at
::
Tensor
voxel_num
=
at_npu
::
native
::
NPUNativeFunctions
::
npu_dtype_cast
(
voxel_num_tmp
,
at
::
kInt
);
at
::
Tensor
voxel_num
=
voxel_num_tmp
.
to
(
at
::
kInt
);
at
::
Tensor
voxel_size_cpu
=
at
::
from_blob
(
const_cast
<
float
*>
(
voxel_size
.
data
()),
{
3
},
dtype
(
at
::
kFloat
));
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment