Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
MMCV
Commits
a4dc2a72
Unverified
Commit
a4dc2a72
authored
Dec 24, 2021
by
pc
Committed by
GitHub
Dec 24, 2021
Browse files
support device dispatch in parrots (#1588)
parent
0bcbeadb
Changes
46
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1830 additions
and
1292 deletions
+1830
-1292
mmcv/ops/csrc/parrots/assign_score_withk.cpp
mmcv/ops/csrc/parrots/assign_score_withk.cpp
+15
-58
mmcv/ops/csrc/parrots/ball_query.cpp
mmcv/ops/csrc/parrots/ball_query.cpp
+7
-24
mmcv/ops/csrc/parrots/bbox_overlaps.cpp
mmcv/ops/csrc/parrots/bbox_overlaps.cpp
+5
-21
mmcv/ops/csrc/parrots/border_align.cpp
mmcv/ops/csrc/parrots/border_align.cpp
+10
-48
mmcv/ops/csrc/parrots/box_iou_rotated.cpp
mmcv/ops/csrc/parrots/box_iou_rotated.cpp
+7
-17
mmcv/ops/csrc/parrots/box_iou_rotated_cpu.cpp
mmcv/ops/csrc/parrots/box_iou_rotated_cpu.cpp
+0
-33
mmcv/ops/csrc/parrots/carafe.cpp
mmcv/ops/csrc/parrots/carafe.cpp
+14
-60
mmcv/ops/csrc/parrots/carafe_naive.cpp
mmcv/ops/csrc/parrots/carafe_naive.cpp
+12
-49
mmcv/ops/csrc/parrots/correlation.cpp
mmcv/ops/csrc/parrots/correlation.cpp
+17
-57
mmcv/ops/csrc/parrots/cudabind.cpp
mmcv/ops/csrc/parrots/cudabind.cpp
+1364
-0
mmcv/ops/csrc/parrots/deform_conv.cpp
mmcv/ops/csrc/parrots/deform_conv.cpp
+50
-91
mmcv/ops/csrc/parrots/deform_conv_cpu.cpp
mmcv/ops/csrc/parrots/deform_conv_cpu.cpp
+0
-377
mmcv/ops/csrc/parrots/deform_roi_pool.cpp
mmcv/ops/csrc/parrots/deform_roi_pool.cpp
+15
-55
mmcv/ops/csrc/parrots/focal_loss.cpp
mmcv/ops/csrc/parrots/focal_loss.cpp
+19
-97
mmcv/ops/csrc/parrots/furthest_point_sample.cpp
mmcv/ops/csrc/parrots/furthest_point_sample.cpp
+16
-44
mmcv/ops/csrc/parrots/fused_bias_leakyrelu.cpp
mmcv/ops/csrc/parrots/fused_bias_leakyrelu.cpp
+112
-20
mmcv/ops/csrc/parrots/gather_points.cpp
mmcv/ops/csrc/parrots/gather_points.cpp
+13
-38
mmcv/ops/csrc/parrots/group_points.cpp
mmcv/ops/csrc/parrots/group_points.cpp
+13
-37
mmcv/ops/csrc/parrots/info.cpp
mmcv/ops/csrc/parrots/info.cpp
+56
-0
mmcv/ops/csrc/parrots/iou3d.cpp
mmcv/ops/csrc/parrots/iou3d.cpp
+85
-166
No files found.
mmcv/ops/csrc/parrots/assign_score_withk.cpp
View file @
a4dc2a72
// Modified from
// Modified from
// https://github.com/CVMI-Lab/PAConv/tree/main/scene_seg/lib/paconv_lib/src/gpu
// https://github.com/CVMI-Lab/PAConv/tree/main/scene_seg/lib/paconv_lib/src/gpu
#include "pytorch_cpp_helper.hpp"
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
void
assign_score_withk_forward_impl
(
int
B
,
int
N0
,
int
N1
,
int
M
,
int
K
,
int
O
,
void
AssignScoreWithKForwardCUDAKernelLauncher
(
int
B
,
int
N0
,
int
N1
,
int
M
,
int
K
,
int
O
,
int
aggregate
,
const
Tensor
&
points
,
const
Tensor
&
centers
,
const
Tensor
&
scores
,
const
Tensor
&
knn_idx
,
Tensor
&
output
);
void
assign_score_withk_forward_cuda
(
int
B
,
int
N0
,
int
N1
,
int
M
,
int
K
,
int
O
,
int
aggregate
,
const
Tensor
&
points
,
int
aggregate
,
const
Tensor
&
points
,
const
Tensor
&
centers
,
const
Tensor
&
centers
,
const
Tensor
&
scores
,
const
Tensor
&
scores
,
const
Tensor
&
knn_idx
,
Tensor
&
output
)
{
const
Tensor
&
knn_idx
,
Tensor
&
output
)
{
AssignScoreWithKForwardCUDAKernelLauncher
(
DISPATCH_DEVICE_IMPL
(
assign_score_withk_forward_impl
,
B
,
N0
,
N1
,
M
,
K
,
O
,
B
,
N0
,
N1
,
M
,
K
,
O
,
aggregate
,
points
,
centers
,
scores
,
knn_idx
,
output
);
aggregate
,
points
,
centers
,
scores
,
knn_idx
,
output
);
};
}
void
AssignScoreWithKBackwardCUDAKernelLauncher
(
int
B
,
int
N0
,
int
N1
,
int
M
,
int
K
,
int
O
,
int
aggregate
,
const
Tensor
&
grad_out
,
const
Tensor
&
points
,
const
Tensor
&
centers
,
const
Tensor
&
scores
,
const
Tensor
&
knn_idx
,
Tensor
&
grad_points
,
Tensor
&
grad_centers
,
Tensor
&
grad_scores
);
void
assign_score_withk_backward_
cuda
(
void
assign_score_withk_backward_
impl
(
int
B
,
int
N0
,
int
N1
,
int
M
,
int
K
,
int
O
,
int
aggregate
,
int
B
,
int
N0
,
int
N1
,
int
M
,
int
K
,
int
O
,
int
aggregate
,
const
Tensor
&
grad_out
,
const
Tensor
&
points
,
const
Tensor
&
centers
,
const
Tensor
&
grad_out
,
const
Tensor
&
points
,
const
Tensor
&
centers
,
const
Tensor
&
scores
,
const
Tensor
&
knn_idx
,
Tensor
&
grad_points
,
const
Tensor
&
scores
,
const
Tensor
&
knn_idx
,
Tensor
&
grad_points
,
Tensor
&
grad_centers
,
Tensor
&
grad_scores
)
{
Tensor
&
grad_centers
,
Tensor
&
grad_scores
)
{
AssignScoreWithKBackwardCUDAKernelLauncher
(
DISPATCH_DEVICE_IMPL
(
assign_score_withk_backward_impl
,
B
,
N0
,
N1
,
M
,
K
,
O
,
B
,
N0
,
N1
,
M
,
K
,
O
,
aggregate
,
grad_out
,
points
,
centers
,
scores
,
knn_idx
,
aggregate
,
grad_out
,
points
,
centers
,
scores
,
knn_idx
,
grad_points
,
grad_centers
,
grad_scores
);
grad_points
,
grad_centers
,
grad_scores
);
};
}
#endif
void
assign_score_withk_forward
(
const
Tensor
&
points
,
const
Tensor
&
centers
,
void
assign_score_withk_forward
(
const
Tensor
&
points
,
const
Tensor
&
centers
,
const
Tensor
&
scores
,
const
Tensor
&
knn_idx
,
const
Tensor
&
scores
,
const
Tensor
&
knn_idx
,
Tensor
&
output
,
int
B
,
int
N0
,
int
N1
,
int
M
,
Tensor
&
output
,
int
B
,
int
N0
,
int
N1
,
int
M
,
int
K
,
int
O
,
int
aggregate
)
{
int
K
,
int
O
,
int
aggregate
)
{
if
(
points
.
device
().
is_cuda
())
{
assign_score_withk_forward_impl
(
B
,
N0
,
N1
,
M
,
K
,
O
,
aggregate
,
points
,
#ifdef MMCV_WITH_CUDA
centers
,
scores
,
knn_idx
,
output
);
CHECK_CONTIGUOUS
(
points
);
CHECK_CONTIGUOUS
(
centers
);
CHECK_CONTIGUOUS
(
scores
);
CHECK_CONTIGUOUS
(
knn_idx
);
CHECK_CONTIGUOUS
(
output
);
assign_score_withk_forward_cuda
(
B
,
N0
,
N1
,
M
,
K
,
O
,
aggregate
,
points
,
centers
,
scores
,
knn_idx
,
output
);
#else
AT_ERROR
(
"assign_score_withk is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"assign_score_withk is not implemented on CPU"
);
}
}
}
void
assign_score_withk_backward
(
const
Tensor
&
grad_out
,
const
Tensor
&
points
,
void
assign_score_withk_backward
(
const
Tensor
&
grad_out
,
const
Tensor
&
points
,
...
@@ -62,24 +36,7 @@ void assign_score_withk_backward(const Tensor& grad_out, const Tensor& points,
...
@@ -62,24 +36,7 @@ void assign_score_withk_backward(const Tensor& grad_out, const Tensor& points,
Tensor
&
grad_centers
,
Tensor
&
grad_scores
,
Tensor
&
grad_centers
,
Tensor
&
grad_scores
,
int
B
,
int
N0
,
int
N1
,
int
M
,
int
K
,
int
O
,
int
B
,
int
N0
,
int
N1
,
int
M
,
int
K
,
int
O
,
int
aggregate
)
{
int
aggregate
)
{
if
(
grad_points
.
device
().
is_cuda
())
{
assign_score_withk_backward_impl
(
B
,
N0
,
N1
,
M
,
K
,
O
,
aggregate
,
grad_out
,
#ifdef MMCV_WITH_CUDA
points
,
centers
,
scores
,
knn_idx
,
CHECK_CONTIGUOUS
(
grad_out
);
grad_points
,
grad_centers
,
grad_scores
);
CHECK_CONTIGUOUS
(
scores
);
CHECK_CONTIGUOUS
(
points
);
CHECK_CONTIGUOUS
(
centers
);
CHECK_CONTIGUOUS
(
knn_idx
);
CHECK_CONTIGUOUS
(
grad_scores
);
CHECK_CONTIGUOUS
(
grad_points
);
CHECK_CONTIGUOUS
(
grad_centers
);
assign_score_withk_backward_cuda
(
B
,
N0
,
N1
,
M
,
K
,
O
,
aggregate
,
grad_out
,
points
,
centers
,
scores
,
knn_idx
,
grad_points
,
grad_centers
,
grad_scores
);
#else
AT_ERROR
(
"assign_score_withk is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"assign_score_withk is not implemented on CPU"
);
}
}
}
mmcv/ops/csrc/parrots/ball_query.cpp
View file @
a4dc2a72
...
@@ -2,36 +2,19 @@
...
@@ -2,36 +2,19 @@
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/ball_query.cpp
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/ball_query.cpp
#include "pytorch_cpp_helper.hpp"
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
void
ball_query_forward_impl
(
int
b
,
int
n
,
int
m
,
float
min_radius
,
void
BallQueryForwardCUDAKernelLauncher
(
int
b
,
int
n
,
int
m
,
float
min_radius
,
float
max_radius
,
int
nsample
,
const
Tensor
new_xyz
,
const
Tensor
xyz
,
Tensor
idx
);
void
ball_query_forward_cuda
(
int
b
,
int
n
,
int
m
,
float
min_radius
,
float
max_radius
,
int
nsample
,
float
max_radius
,
int
nsample
,
const
Tensor
new_xyz
,
const
Tensor
xyz
,
const
Tensor
new_xyz
,
const
Tensor
xyz
,
Tensor
idx
)
{
Tensor
idx
)
{
BallQueryForwardCUDAKernelLauncher
(
b
,
n
,
m
,
min_radius
,
max_radius
,
nsample
,
DISPATCH_DEVICE_IMPL
(
ball_query_forward_impl
,
b
,
n
,
m
,
min_radius
,
max_radius
,
new_xyz
,
xyz
,
idx
);
nsample
,
new_xyz
,
xyz
,
idx
);
};
}
#endif
void
ball_query_forward
(
Tensor
new_xyz_tensor
,
Tensor
xyz_tensor
,
void
ball_query_forward
(
Tensor
new_xyz_tensor
,
Tensor
xyz_tensor
,
Tensor
idx_tensor
,
int
b
,
int
n
,
int
m
,
Tensor
idx_tensor
,
int
b
,
int
n
,
int
m
,
float
min_radius
,
float
max_radius
,
int
nsample
)
{
float
min_radius
,
float
max_radius
,
int
nsample
)
{
if
(
new_xyz_tensor
.
device
().
is_cuda
())
{
ball_query_forward_impl
(
b
,
n
,
m
,
min_radius
,
max_radius
,
nsample
,
#ifdef MMCV_WITH_CUDA
new_xyz_tensor
,
xyz_tensor
,
idx_tensor
);
CHECK_CUDA_INPUT
(
new_xyz_tensor
);
CHECK_CUDA_INPUT
(
xyz_tensor
);
ball_query_forward_cuda
(
b
,
n
,
m
,
min_radius
,
max_radius
,
nsample
,
new_xyz_tensor
,
xyz_tensor
,
idx_tensor
);
#else
AT_ERROR
(
"ball_query is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"ball_query is not implemented on CPU"
);
}
}
}
mmcv/ops/csrc/parrots/bbox_overlaps.cpp
View file @
a4dc2a72
// Copyright (c) OpenMMLab. All rights reserved
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
void
bbox_overlaps_impl
(
const
Tensor
bboxes1
,
const
Tensor
bboxes2
,
Tensor
ious
,
void
BBoxOverlapsCUDAKernelLauncher
(
const
Tensor
bboxes1
,
const
Tensor
bboxes2
,
Tensor
ious
,
const
int
mode
,
const
bool
aligned
,
const
int
offset
);
void
bbox_overlaps_cuda
(
const
Tensor
bboxes1
,
const
Tensor
bboxes2
,
Tensor
ious
,
const
int
mode
,
const
bool
aligned
,
const
int
offset
)
{
const
int
mode
,
const
bool
aligned
,
const
int
offset
)
{
BBoxOverlapsCUDAKernelLauncher
(
bboxes1
,
bboxes2
,
ious
,
mode
,
aligned
,
offset
);
DISPATCH_DEVICE_IMPL
(
bbox_overlaps_impl
,
bboxes1
,
bboxes2
,
ious
,
mode
,
aligned
,
offset
);
}
}
#endif
void
bbox_overlaps
(
const
Tensor
bboxes1
,
const
Tensor
bboxes2
,
Tensor
ious
,
void
bbox_overlaps
(
const
Tensor
bboxes1
,
const
Tensor
bboxes2
,
Tensor
ious
,
const
int
mode
,
const
bool
aligned
,
const
int
offset
)
{
const
int
mode
,
const
bool
aligned
,
const
int
offset
)
{
if
(
bboxes1
.
device
().
is_cuda
())
{
bbox_overlaps_impl
(
bboxes1
,
bboxes2
,
ious
,
mode
,
aligned
,
offset
);
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT
(
bboxes1
);
CHECK_CUDA_INPUT
(
bboxes2
);
CHECK_CUDA_INPUT
(
ious
);
bbox_overlaps_cuda
(
bboxes1
,
bboxes2
,
ious
,
mode
,
aligned
,
offset
);
#else
AT_ERROR
(
"bbox_overlaps is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"bbox_overlaps is not implemented on CPU"
);
}
}
}
mmcv/ops/csrc/parrots/border_align.cpp
View file @
a4dc2a72
// Copyright (c) OpenMMLab. All rights reserved
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
void
border_align_forward_impl
(
const
Tensor
&
input
,
const
Tensor
&
boxes
,
void
BorderAlignForwardCUDAKernelLauncher
(
const
Tensor
&
input
,
const
Tensor
&
boxes
,
Tensor
output
,
Tensor
argmax_idx
,
const
int
pool_size
);
void
BorderAlignBackwardCUDAKernelLauncher
(
const
Tensor
&
grad_output
,
const
Tensor
&
boxes
,
const
Tensor
&
argmax_idx
,
Tensor
grad_input
,
const
int
pool_size
);
void
border_align_forward_cuda
(
const
Tensor
&
input
,
const
Tensor
&
boxes
,
Tensor
output
,
Tensor
argmax_idx
,
Tensor
output
,
Tensor
argmax_idx
,
const
int
pool_size
)
{
const
int
pool_size
)
{
B
order
A
lign
F
orward
CUDAKernelLauncher
(
input
,
boxes
,
output
,
argmax_idx
,
DISPATCH_DEVICE_IMPL
(
b
order
_a
lign
_f
orward
_impl
,
input
,
boxes
,
output
,
pool_size
);
argmax_idx
,
pool_size
);
}
}
void
border_align_backward_
cuda
(
const
Tensor
&
grad_output
,
const
Tensor
&
boxes
,
void
border_align_backward_
impl
(
const
Tensor
&
grad_output
,
const
Tensor
&
boxes
,
const
Tensor
&
argmax_idx
,
Tensor
grad_input
,
const
Tensor
&
argmax_idx
,
Tensor
grad_input
,
const
int
pool_size
)
{
const
int
pool_size
)
{
B
order
A
lign
B
ackward
CUDAKernelLauncher
(
grad_output
,
boxes
,
argmax_idx
,
DISPATCH_DEVICE_IMPL
(
b
order
_a
lign
_b
ackward
_impl
,
grad_output
,
boxes
,
grad_input
,
pool_size
);
argmax_idx
,
grad_input
,
pool_size
);
}
}
#endif
void
border_align_forward
(
const
Tensor
&
input
,
const
Tensor
&
boxes
,
void
border_align_forward
(
const
Tensor
&
input
,
const
Tensor
&
boxes
,
Tensor
output
,
Tensor
argmax_idx
,
Tensor
output
,
Tensor
argmax_idx
,
const
int
pool_size
)
{
const
int
pool_size
)
{
if
(
input
.
device
().
is_cuda
())
{
border_align_forward_impl
(
input
,
boxes
,
output
,
argmax_idx
,
pool_size
);
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT
(
input
);
CHECK_CUDA_INPUT
(
boxes
);
CHECK_CUDA_INPUT
(
output
);
CHECK_CUDA_INPUT
(
argmax_idx
);
border_align_forward_cuda
(
input
,
boxes
,
output
,
argmax_idx
,
pool_size
);
#else
AT_ERROR
(
"BorderAlign is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"BorderAlign is not implemented on CPU"
);
}
}
}
void
border_align_backward
(
const
Tensor
&
grad_output
,
const
Tensor
&
boxes
,
void
border_align_backward
(
const
Tensor
&
grad_output
,
const
Tensor
&
boxes
,
const
Tensor
&
argmax_idx
,
Tensor
grad_input
,
const
Tensor
&
argmax_idx
,
Tensor
grad_input
,
const
int
pool_size
)
{
const
int
pool_size
)
{
if
(
grad_output
.
device
().
is_cuda
())
{
border_align_backward_impl
(
grad_output
,
boxes
,
argmax_idx
,
grad_input
,
#ifdef MMCV_WITH_CUDA
pool_size
);
CHECK_CUDA_INPUT
(
grad_output
);
CHECK_CUDA_INPUT
(
boxes
);
CHECK_CUDA_INPUT
(
argmax_idx
);
CHECK_CUDA_INPUT
(
grad_input
);
border_align_backward_cuda
(
grad_output
,
boxes
,
argmax_idx
,
grad_input
,
pool_size
);
#else
AT_ERROR
(
"BorderAlign is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"BorderAlign is not implemented on CPU"
);
}
}
}
mmcv/ops/csrc/parrots/box_iou_rotated.cpp
View file @
a4dc2a72
...
@@ -2,28 +2,18 @@
...
@@ -2,28 +2,18 @@
// modified from
// modified from
// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h
// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h
#include "pytorch_cpp_helper.hpp"
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
void
box_iou_rotated_cpu
(
const
Tensor
boxes1
,
const
Tensor
boxes2
,
Tensor
ious
,
void
box_iou_rotated_impl
(
const
Tensor
boxes1
,
const
Tensor
boxes2
,
Tensor
ious
,
const
int
mode_flag
,
const
bool
aligned
);
const
int
mode_flag
,
const
bool
aligned
)
{
DISPATCH_DEVICE_IMPL
(
box_iou_rotated_impl
,
boxes1
,
boxes2
,
ious
,
mode_flag
,
#ifdef MMCV_WITH_CUDA
aligned
);
void
box_iou_rotated_cuda
(
const
Tensor
boxes1
,
const
Tensor
boxes2
,
Tensor
ious
,
}
const
int
mode_flag
,
const
bool
aligned
);
#endif
// Interface for Python
// Interface for Python
// inline is needed to prevent multiple function definitions when this header is
// inline is needed to prevent multiple function definitions when this header is
// included by different cpps
// included by different cpps
void
box_iou_rotated
(
const
Tensor
boxes1
,
const
Tensor
boxes2
,
Tensor
ious
,
void
box_iou_rotated
(
const
Tensor
boxes1
,
const
Tensor
boxes2
,
Tensor
ious
,
const
int
mode_flag
,
const
bool
aligned
)
{
const
int
mode_flag
,
const
bool
aligned
)
{
assert
(
boxes1
.
device
().
is_cuda
()
==
boxes2
.
device
().
is_cuda
());
box_iou_rotated_impl
(
boxes1
,
boxes2
,
ious
,
mode_flag
,
aligned
);
if
(
boxes1
.
device
().
is_cuda
())
{
#ifdef MMCV_WITH_CUDA
box_iou_rotated_cuda
(
boxes1
,
boxes2
,
ious
,
mode_flag
,
aligned
);
#else
AT_ERROR
(
"Not compiled with GPU support"
);
#endif
}
else
{
box_iou_rotated_cpu
(
boxes1
,
boxes2
,
ious
,
mode_flag
,
aligned
);
}
}
}
mmcv/ops/csrc/parrots/box_iou_rotated_cpu.cpp
deleted
100644 → 0
View file @
0bcbeadb
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
// modified from
// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp
#include "box_iou_rotated_utils.hpp"
#include "pytorch_cpp_helper.hpp"
template
<
typename
T
>
void
box_iou_rotated_cpu_kernel
(
const
Tensor
boxes1
,
const
Tensor
boxes2
,
Tensor
ious
,
const
int
mode_flag
,
const
bool
aligned
)
{
int
output_size
=
ious
.
numel
();
auto
num_boxes1
=
boxes1
.
size
(
0
);
auto
num_boxes2
=
boxes2
.
size
(
0
);
if
(
aligned
)
{
for
(
int
i
=
0
;
i
<
output_size
;
i
++
)
{
ious
[
i
]
=
single_box_iou_rotated
<
T
>
(
boxes1
[
i
].
data_ptr
<
T
>
(),
boxes2
[
i
].
data_ptr
<
T
>
(),
mode_flag
);
}
}
else
{
for
(
int
i
=
0
;
i
<
num_boxes1
;
i
++
)
{
for
(
int
j
=
0
;
j
<
num_boxes2
;
j
++
)
{
ious
[
i
*
num_boxes2
+
j
]
=
single_box_iou_rotated
<
T
>
(
boxes1
[
i
].
data_ptr
<
T
>
(),
boxes2
[
j
].
data_ptr
<
T
>
(),
mode_flag
);
}
}
}
}
void
box_iou_rotated_cpu
(
const
Tensor
boxes1
,
const
Tensor
boxes2
,
Tensor
ious
,
const
int
mode_flag
,
const
bool
aligned
)
{
box_iou_rotated_cpu_kernel
<
float
>
(
boxes1
,
boxes2
,
ious
,
mode_flag
,
aligned
);
}
mmcv/ops/csrc/parrots/carafe.cpp
View file @
a4dc2a72
// Copyright (c) OpenMMLab. All rights reserved
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
void
carafe_forward_impl
(
Tensor
features
,
Tensor
masks
,
Tensor
rfeatures
,
void
CARAFEForwardCUDAKernelLauncher
(
const
Tensor
features
,
const
Tensor
masks
,
Tensor
rfeatures
,
Tensor
routput
,
Tensor
rmasks
,
Tensor
output
,
const
int
kernel_size
,
const
int
group_size
,
const
int
scale_factor
);
void
CARAFEBackwardCUDAKernelLauncher
(
const
Tensor
top_grad
,
const
Tensor
rfeatures
,
const
Tensor
masks
,
Tensor
rtop_grad
,
Tensor
rbottom_grad_hs
,
Tensor
rbottom_grad
,
Tensor
rmask_grad
,
Tensor
bottom_grad
,
Tensor
mask_grad
,
const
int
kernel_size
,
const
int
group_size
,
const
int
scale_factor
);
void
carafe_forward_cuda
(
Tensor
features
,
Tensor
masks
,
Tensor
rfeatures
,
Tensor
routput
,
Tensor
rmasks
,
Tensor
output
,
Tensor
routput
,
Tensor
rmasks
,
Tensor
output
,
int
kernel_size
,
int
group_size
,
int
scale_factor
)
{
int
kernel_size
,
int
group_size
,
int
scale_factor
)
{
CARAFEForwardCUDAKernelLauncher
(
features
,
masks
,
rfeatures
,
routput
,
rmasks
,
DISPATCH_DEVICE_IMPL
(
carafe_forward_impl
,
features
,
masks
,
rfeatures
,
routput
,
output
,
kernel_size
,
group_size
,
rmasks
,
output
,
kernel_size
,
group_size
,
scale_factor
);
scale_factor
);
}
}
void
carafe_backward_
cuda
(
Tensor
top_grad
,
Tensor
rfeatures
,
Tensor
masks
,
void
carafe_backward_
impl
(
Tensor
top_grad
,
Tensor
rfeatures
,
Tensor
masks
,
Tensor
rtop_grad
,
Tensor
rbottom_grad_hs
,
Tensor
rtop_grad
,
Tensor
rbottom_grad_hs
,
Tensor
rbottom_grad
,
Tensor
rmask_grad
,
Tensor
rbottom_grad
,
Tensor
rmask_grad
,
Tensor
bottom_grad
,
Tensor
mask_grad
,
int
kernel_size
,
Tensor
bottom_grad
,
Tensor
mask_grad
,
int
kernel_size
,
int
group_size
,
int
scale_factor
)
{
int
group_size
,
int
scale_factor
)
{
CARAFEBackwardCUDAKernelLauncher
(
top_grad
,
rfeatures
,
masks
,
rtop_grad
,
DISPATCH_DEVICE_IMPL
(
carafe_backward_impl
,
top_grad
,
rfeatures
,
masks
,
rbottom_grad_hs
,
rbottom_grad
,
rmask_grad
,
rtop_grad
,
rbottom_grad_hs
,
rbottom_grad
,
rmask_grad
,
bottom_grad
,
mask_grad
,
kernel_size
,
bottom_grad
,
mask_grad
,
kernel_size
,
group_size
,
group_size
,
scale_factor
);
scale_factor
);
}
}
#endif
void
carafe_forward
(
Tensor
features
,
Tensor
masks
,
Tensor
rfeatures
,
void
carafe_forward
(
Tensor
features
,
Tensor
masks
,
Tensor
rfeatures
,
Tensor
routput
,
Tensor
rmasks
,
Tensor
output
,
Tensor
routput
,
Tensor
rmasks
,
Tensor
output
,
int
kernel_size
,
int
group_size
,
int
scale_factor
)
{
int
kernel_size
,
int
group_size
,
int
scale_factor
)
{
if
(
features
.
device
().
is_cuda
())
{
carafe_forward_impl
(
features
,
masks
,
rfeatures
,
routput
,
rmasks
,
output
,
#ifdef MMCV_WITH_CUDA
kernel_size
,
group_size
,
scale_factor
);
CHECK_CUDA_INPUT
(
features
);
CHECK_CUDA_INPUT
(
masks
);
CHECK_CUDA_INPUT
(
rfeatures
);
CHECK_CUDA_INPUT
(
routput
);
CHECK_CUDA_INPUT
(
rmasks
);
CHECK_CUDA_INPUT
(
output
);
carafe_forward_cuda
(
features
,
masks
,
rfeatures
,
routput
,
rmasks
,
output
,
kernel_size
,
group_size
,
scale_factor
);
#else
AT_ERROR
(
"Carafe is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"Carafe is not implemented on CPU"
);
}
}
}
void
carafe_backward
(
Tensor
top_grad
,
Tensor
rfeatures
,
Tensor
masks
,
void
carafe_backward
(
Tensor
top_grad
,
Tensor
rfeatures
,
Tensor
masks
,
...
@@ -61,24 +32,7 @@ void carafe_backward(Tensor top_grad, Tensor rfeatures, Tensor masks,
...
@@ -61,24 +32,7 @@ void carafe_backward(Tensor top_grad, Tensor rfeatures, Tensor masks,
Tensor
rbottom_grad
,
Tensor
rmask_grad
,
Tensor
bottom_grad
,
Tensor
rbottom_grad
,
Tensor
rmask_grad
,
Tensor
bottom_grad
,
Tensor
mask_grad
,
int
kernel_size
,
int
group_size
,
Tensor
mask_grad
,
int
kernel_size
,
int
group_size
,
int
scale_factor
)
{
int
scale_factor
)
{
if
(
top_grad
.
device
().
is_cuda
())
{
carafe_backward_impl
(
top_grad
,
rfeatures
,
masks
,
rtop_grad
,
rbottom_grad_hs
,
#ifdef MMCV_WITH_CUDA
rbottom_grad
,
rmask_grad
,
bottom_grad
,
mask_grad
,
CHECK_CUDA_INPUT
(
top_grad
);
kernel_size
,
group_size
,
scale_factor
);
CHECK_CUDA_INPUT
(
rfeatures
);
CHECK_CUDA_INPUT
(
masks
);
CHECK_CUDA_INPUT
(
rtop_grad
);
CHECK_CUDA_INPUT
(
rbottom_grad_hs
);
CHECK_CUDA_INPUT
(
rbottom_grad
);
CHECK_CUDA_INPUT
(
rmask_grad
);
CHECK_CUDA_INPUT
(
bottom_grad
);
CHECK_CUDA_INPUT
(
mask_grad
);
carafe_backward_cuda
(
top_grad
,
rfeatures
,
masks
,
rtop_grad
,
rbottom_grad_hs
,
rbottom_grad
,
rmask_grad
,
bottom_grad
,
mask_grad
,
kernel_size
,
group_size
,
scale_factor
);
#else
AT_ERROR
(
"Carafe is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"Carafe is not implemented on CPU"
);
}
}
}
mmcv/ops/csrc/parrots/carafe_naive.cpp
View file @
a4dc2a72
// Copyright (c) OpenMMLab. All rights reserved
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
void
carafe_naive_forward_impl
(
Tensor
features
,
Tensor
masks
,
Tensor
output
,
void
CARAFENAIVEForwardCUDAKernelLauncher
(
const
Tensor
features
,
const
Tensor
masks
,
Tensor
output
,
const
int
kernel_size
,
const
int
group_size
,
const
int
scale_factor
);
void
CARAFENAIVEBackwardCUDAKernelLauncher
(
const
Tensor
top_grad
,
const
Tensor
features
,
const
Tensor
masks
,
Tensor
bottom_grad
,
Tensor
mask_grad
,
const
int
kernel_size
,
const
int
group_size
,
const
int
scale_factor
);
void
carafe_naive_forward_cuda
(
Tensor
features
,
Tensor
masks
,
Tensor
output
,
int
kernel_size
,
int
group_size
,
int
kernel_size
,
int
group_size
,
int
scale_factor
)
{
int
scale_factor
)
{
CARAFENAIVEForwardCUDAKernelLauncher
(
features
,
masks
,
output
,
kernel_size
,
DISPATCH_DEVICE_IMPL
(
carafe_naive_forward_impl
,
features
,
masks
,
output
,
group_size
,
scale_factor
);
kernel_size
,
group_size
,
scale_factor
);
}
}
void
carafe_naive_backward_
cuda
(
Tensor
top_grad
,
Tensor
features
,
Tensor
masks
,
void
carafe_naive_backward_
impl
(
Tensor
top_grad
,
Tensor
features
,
Tensor
masks
,
Tensor
bottom_grad
,
Tensor
mask_grad
,
Tensor
bottom_grad
,
Tensor
mask_grad
,
int
kernel_size
,
int
group_size
,
int
kernel_size
,
int
group_size
,
int
scale_factor
)
{
int
scale_factor
)
{
CARAFENAIVEBackwardCUDAKernelLauncher
(
top_grad
,
features
,
masks
,
bottom_grad
,
DISPATCH_DEVICE_IMPL
(
carafe_naive_backward_impl
,
top_grad
,
features
,
masks
,
mask_grad
,
kernel_size
,
group_size
,
bottom_grad
,
mask_grad
,
kernel_size
,
group_size
,
scale_factor
);
scale_factor
);
}
}
#endif
void
carafe_naive_forward
(
Tensor
features
,
Tensor
masks
,
Tensor
output
,
void
carafe_naive_forward
(
Tensor
features
,
Tensor
masks
,
Tensor
output
,
int
kernel_size
,
int
group_size
,
int
scale_factor
)
{
int
kernel_size
,
int
group_size
,
int
scale_factor
)
{
if
(
features
.
device
().
is_cuda
())
{
carafe_naive_forward_impl
(
features
,
masks
,
output
,
kernel_size
,
group_size
,
#ifdef MMCV_WITH_CUDA
scale_factor
);
CHECK_CUDA_INPUT
(
features
);
CHECK_CUDA_INPUT
(
masks
);
CHECK_CUDA_INPUT
(
output
);
carafe_naive_forward_cuda
(
features
,
masks
,
output
,
kernel_size
,
group_size
,
scale_factor
);
#else
AT_ERROR
(
"CarafeNaive is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"CarafeNaive is not implemented on CPU"
);
}
}
}
void
carafe_naive_backward
(
Tensor
top_grad
,
Tensor
features
,
Tensor
masks
,
void
carafe_naive_backward
(
Tensor
top_grad
,
Tensor
features
,
Tensor
masks
,
Tensor
bottom_grad
,
Tensor
mask_grad
,
Tensor
bottom_grad
,
Tensor
mask_grad
,
int
kernel_size
,
int
group_size
,
int
scale_factor
)
{
int
kernel_size
,
int
group_size
,
int
scale_factor
)
{
if
(
top_grad
.
device
().
is_cuda
())
{
carafe_naive_backward_impl
(
top_grad
,
features
,
masks
,
bottom_grad
,
mask_grad
,
#ifdef MMCV_WITH_CUDA
kernel_size
,
group_size
,
scale_factor
);
CHECK_CUDA_INPUT
(
top_grad
);
CHECK_CUDA_INPUT
(
features
);
CHECK_CUDA_INPUT
(
masks
);
CHECK_CUDA_INPUT
(
bottom_grad
);
CHECK_CUDA_INPUT
(
mask_grad
);
carafe_naive_backward_cuda
(
top_grad
,
features
,
masks
,
bottom_grad
,
mask_grad
,
kernel_size
,
group_size
,
scale_factor
);
#else
AT_ERROR
(
"CarafeNaive is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"CarafeNaive is not implemented on CPU"
);
}
}
}
mmcv/ops/csrc/parrots/correlation.cpp
View file @
a4dc2a72
...
@@ -2,65 +2,37 @@
...
@@ -2,65 +2,37 @@
#include <iostream>
#include <iostream>
#include "pytorch_cpp_helper.hpp"
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
void
correlation_forward_impl
(
Tensor
input1
,
Tensor
input2
,
Tensor
output
,
void
CorrelationForwardCUDAKernelLauncher
(
Tensor
input1
,
Tensor
input2
,
Tensor
output
,
int
kH
,
int
kW
,
int
patchH
,
int
patchW
,
int
padH
,
int
padW
,
int
dilationH
,
int
dilationW
,
int
dilation_patchH
,
int
dilation_patchW
,
int
dH
,
int
dW
);
void
CorrelationBackwardCUDAKernelLauncher
(
Tensor
grad_output
,
Tensor
input1
,
Tensor
input2
,
Tensor
grad_input1
,
Tensor
grad_input2
,
int
kH
,
int
kW
,
int
patchH
,
int
patchW
,
int
padH
,
int
padW
,
int
dilationH
,
int
dilationW
,
int
dilation_patchH
,
int
dilation_patchW
,
int
dH
,
int
dW
);
void
correlation_cuda_forward
(
Tensor
input1
,
Tensor
input2
,
Tensor
output
,
int
kH
,
int
kW
,
int
patchH
,
int
patchW
,
int
padH
,
int
kH
,
int
kW
,
int
patchH
,
int
patchW
,
int
padH
,
int
padW
,
int
dilationH
,
int
dilationW
,
int
padW
,
int
dilationH
,
int
dilationW
,
int
dilation_patchH
,
int
dilation_patchW
,
int
dH
,
int
dilation_patchH
,
int
dilation_patchW
,
int
dH
,
int
dW
)
{
int
dW
)
{
C
orrelation
F
orward
CUDAKernelLauncher
(
DISPATCH_DEVICE_IMPL
(
c
orrelation
_f
orward
_impl
,
input1
,
input2
,
output
,
kH
,
kW
,
input1
,
input2
,
output
,
kH
,
kW
,
patchH
,
patchW
,
padH
,
padW
,
dilationH
,
patchH
,
patchW
,
padH
,
padW
,
dilationH
,
dilationW
,
dilationW
,
dilation_patchH
,
dilation_patchW
,
dH
,
dW
);
dilation_patchH
,
dilation_patchW
,
dH
,
dW
);
}
}
void
correlation_
cuda_
backward
(
Tensor
grad_output
,
Tensor
input1
,
Tensor
input2
,
void
correlation_backward
_impl
(
Tensor
grad_output
,
Tensor
input1
,
Tensor
input2
,
Tensor
grad_input1
,
Tensor
grad_input2
,
int
kH
,
Tensor
grad_input1
,
Tensor
grad_input2
,
int
kH
,
int
kW
,
int
patchH
,
int
patchW
,
int
padH
,
int
kW
,
int
patchH
,
int
patchW
,
int
padH
,
int
padW
,
int
dilationH
,
int
dilationW
,
int
padW
,
int
dilationH
,
int
dilationW
,
int
dilation_patchH
,
int
dilation_patchW
,
int
dH
,
int
dilation_patchH
,
int
dilation_patchW
,
int
dH
,
int
dW
)
{
int
dW
)
{
C
orrelation
B
ackward
CUDAKernelLauncher
(
DISPATCH_DEVICE_IMPL
(
c
orrelation
_b
ackward
_impl
,
grad_output
,
input1
,
input2
,
grad_output
,
input1
,
input2
,
grad_input1
,
grad_input2
,
kH
,
kW
,
patchH
,
grad_input1
,
grad_input2
,
kH
,
kW
,
patchH
,
patchW
,
padH
,
patchW
,
padH
,
padW
,
dilationH
,
dilationW
,
dilation_patchH
,
padW
,
dilationH
,
dilationW
,
dilation_patchH
,
dilation_patchW
,
dH
,
dW
);
dilation_patchW
,
dH
,
dW
);
}
}
#endif
void
correlation_forward
(
Tensor
input1
,
Tensor
input2
,
Tensor
output
,
int
kH
,
void
correlation_forward
(
Tensor
input1
,
Tensor
input2
,
Tensor
output
,
int
kH
,
int
kW
,
int
patchH
,
int
patchW
,
int
padH
,
int
padW
,
int
kW
,
int
patchH
,
int
patchW
,
int
padH
,
int
padW
,
int
dilationH
,
int
dilationW
,
int
dilation_patchH
,
int
dilationH
,
int
dilationW
,
int
dilation_patchH
,
int
dilation_patchW
,
int
dH
,
int
dW
)
{
int
dilation_patchW
,
int
dH
,
int
dW
)
{
if
(
input1
.
device
().
is_cuda
()
&&
input2
.
device
().
is_cuda
())
{
correlation_forward_impl
(
input1
,
input2
,
output
,
kH
,
kW
,
patchH
,
patchW
,
padH
,
#ifdef MMCV_WITH_CUDA
padW
,
dilationH
,
dilationW
,
dilation_patchH
,
CHECK_CUDA_INPUT
(
input1
);
dilation_patchW
,
dH
,
dW
);
CHECK_CUDA_INPUT
(
input2
);
correlation_cuda_forward
(
input1
,
input2
,
output
,
kH
,
kW
,
patchH
,
patchW
,
padH
,
padW
,
dilationH
,
dilationW
,
dilation_patchH
,
dilation_patchW
,
dH
,
dW
);
#else
AT_ERROR
(
"Correlation is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"Correlation is not implemented on CPU"
);
}
}
}
void
correlation_backward
(
Tensor
grad_output
,
Tensor
input1
,
Tensor
input2
,
void
correlation_backward
(
Tensor
grad_output
,
Tensor
input1
,
Tensor
input2
,
...
@@ -68,20 +40,8 @@ void correlation_backward(Tensor grad_output, Tensor input1, Tensor input2,
...
@@ -68,20 +40,8 @@ void correlation_backward(Tensor grad_output, Tensor input1, Tensor input2,
int
kW
,
int
patchH
,
int
patchW
,
int
padH
,
int
padW
,
int
kW
,
int
patchH
,
int
patchW
,
int
padH
,
int
padW
,
int
dilationH
,
int
dilationW
,
int
dilation_patchH
,
int
dilationH
,
int
dilationW
,
int
dilation_patchH
,
int
dilation_patchW
,
int
dH
,
int
dW
)
{
int
dilation_patchW
,
int
dH
,
int
dW
)
{
if
(
input1
.
device
().
is_cuda
()
&&
input2
.
device
().
is_cuda
())
{
correlation_backward_impl
(
grad_output
,
input1
,
input2
,
grad_input1
,
#ifdef MMCV_WITH_CUDA
grad_input2
,
kH
,
kW
,
patchH
,
patchW
,
padH
,
padW
,
CHECK_CUDA_INPUT
(
grad_output
);
dilationH
,
dilationW
,
dilation_patchH
,
CHECK_CUDA_INPUT
(
input1
);
dilation_patchW
,
dH
,
dW
);
CHECK_CUDA_INPUT
(
input2
);
correlation_cuda_backward
(
grad_output
,
input1
,
input2
,
grad_input1
,
grad_input2
,
kH
,
kW
,
patchH
,
patchW
,
padH
,
padW
,
dilationH
,
dilationW
,
dilation_patchH
,
dilation_patchW
,
dH
,
dW
);
#else
AT_ERROR
(
"Correlation is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"Correlation is not implemented on CPU"
);
}
}
}
mmcv/ops/csrc/parrots/cudabind.cpp
0 → 100644
View file @
a4dc2a72
This diff is collapsed.
Click to expand it.
mmcv/ops/csrc/parrots/deform_conv.cpp
View file @
a4dc2a72
// Copyright (c) OpenMMLab. All rights reserved
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
void
deformable_im2col_impl
(
Tensor
data_im
,
Tensor
data_offset
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
ksize_h
,
const
int
ksize_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
parallel_imgs
,
const
int
deformable_group
,
Tensor
data_col
)
{
DISPATCH_DEVICE_IMPL
(
deformable_im2col_impl
,
data_im
,
data_offset
,
channels
,
height
,
width
,
ksize_h
,
ksize_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
parallel_imgs
,
deformable_group
,
data_col
);
}
#ifdef MMCV_WITH_CUDA
void
deformable_col2im_impl
(
Tensor
data_col
,
Tensor
data_offset
,
const
int
channels
,
const
int
height
,
void
deformable_im2col
(
Tensor
data_im
,
Tensor
data_offset
,
const
int
channels
,
const
int
width
,
const
int
ksize_h
,
const
int
height
,
const
int
width
,
const
int
ksize_h
,
const
int
ksize_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
ksize_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
parallel_imgs
,
const
int
deformable_group
,
const
int
parallel_imgs
,
const
int
deformable_group
,
Tensor
grad_im
)
{
Tensor
data_col
);
DISPATCH_DEVICE_IMPL
(
deformable_col2im_impl
,
data_col
,
data_offset
,
channels
,
height
,
width
,
ksize_h
,
ksize_w
,
pad_h
,
pad_w
,
stride_h
,
void
deformable_col2im
(
Tensor
data_col
,
Tensor
data_offset
,
const
int
channels
,
stride_w
,
dilation_h
,
dilation_w
,
parallel_imgs
,
const
int
height
,
const
int
width
,
const
int
ksize_h
,
deformable_group
,
grad_im
);
const
int
ksize_w
,
const
int
pad_h
,
const
int
pad_w
,
}
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
parallel_imgs
,
const
int
deformable_group
,
Tensor
grad_im
);
void
deformable_col2im_coord
(
Tensor
data_col
,
Tensor
data_im
,
Tensor
data_offset
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
ksize_h
,
const
int
ksize_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
parallel_imgs
,
const
int
deformable_group
,
Tensor
grad_offset
);
#endif
void
deformable_im2col_cpu
(
Tensor
data_im
,
Tensor
data_offset
,
void
deformable_col2im_coord_impl
(
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
ksize_h
,
const
int
ksize_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
parallel_imgs
,
const
int
deformable_group
,
Tensor
data_col
);
void
deformable_col2im_cpu
(
Tensor
data_col
,
Tensor
data_offset
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
ksize_h
,
const
int
ksize_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
parallel_imgs
,
const
int
deformable_group
,
Tensor
grad_im
);
void
deformable_col2im_coord_cpu
(
Tensor
data_col
,
Tensor
data_im
,
Tensor
data_offset
,
const
int
channels
,
Tensor
data_col
,
Tensor
data_im
,
Tensor
data_offset
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
ksize_h
,
const
int
ksize_w
,
const
int
height
,
const
int
width
,
const
int
ksize_h
,
const
int
ksize_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
parallel_imgs
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
parallel_imgs
,
const
int
deformable_group
,
Tensor
grad_offset
);
const
int
deformable_group
,
Tensor
grad_offset
)
{
DISPATCH_DEVICE_IMPL
(
deformable_col2im_coord_impl
,
data_col
,
data_im
,
data_offset
,
channels
,
height
,
width
,
ksize_h
,
ksize_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
parallel_imgs
,
deformable_group
,
grad_offset
);
}
void
deform_conv_shape_check
(
at
::
Tensor
input
,
at
::
Tensor
offset
,
void
deform_conv_shape_check
(
at
::
Tensor
input
,
at
::
Tensor
offset
,
at
::
Tensor
*
gradOutput
,
at
::
Tensor
weight
,
int
kH
,
at
::
Tensor
*
gradOutput
,
at
::
Tensor
weight
,
int
kH
,
...
@@ -227,17 +216,9 @@ void deform_conv_forward(Tensor input, Tensor weight, Tensor offset,
...
@@ -227,17 +216,9 @@ void deform_conv_forward(Tensor input, Tensor weight, Tensor offset,
output_buffer
.
size
(
2
),
output_buffer
.
size
(
3
)});
output_buffer
.
size
(
2
),
output_buffer
.
size
(
3
)});
for
(
int
elt
=
0
;
elt
<
batchSize
/
im2col_step
;
elt
++
)
{
for
(
int
elt
=
0
;
elt
<
batchSize
/
im2col_step
;
elt
++
)
{
if
(
input
.
device
().
is_cuda
())
{
deformable_im2col_impl
(
input
[
elt
],
offset
[
elt
],
nInputPlane
,
inputHeight
,
#ifdef MMCV_WITH_CUDA
inputWidth
,
kH
,
kW
,
padH
,
padW
,
dH
,
dW
,
dilationH
,
deformable_im2col
(
input
[
elt
],
offset
[
elt
],
nInputPlane
,
inputHeight
,
dilationW
,
im2col_step
,
deformable_group
,
columns
);
inputWidth
,
kH
,
kW
,
padH
,
padW
,
dH
,
dW
,
dilationH
,
dilationW
,
im2col_step
,
deformable_group
,
columns
);
#endif
}
else
{
deformable_im2col_cpu
(
input
[
elt
],
offset
[
elt
],
nInputPlane
,
inputHeight
,
inputWidth
,
kH
,
kW
,
padH
,
padW
,
dH
,
dW
,
dilationH
,
dilationW
,
im2col_step
,
deformable_group
,
columns
);
}
columns
=
columns
.
view
({
group
,
columns
.
size
(
0
)
/
group
,
columns
.
size
(
1
)});
columns
=
columns
.
view
({
group
,
columns
.
size
(
0
)
/
group
,
columns
.
size
(
1
)});
weight
=
weight
.
view
({
group
,
weight
.
size
(
0
)
/
group
,
weight
.
size
(
1
),
weight
=
weight
.
view
({
group
,
weight
.
size
(
0
)
/
group
,
weight
.
size
(
1
),
...
@@ -373,29 +354,15 @@ void deform_conv_backward_input(Tensor input, Tensor offset, Tensor gradOutput,
...
@@ -373,29 +354,15 @@ void deform_conv_backward_input(Tensor input, Tensor offset, Tensor gradOutput,
{
gradOutput
.
size
(
0
),
gradOutput
.
size
(
1
)
*
gradOutput
.
size
(
2
),
{
gradOutput
.
size
(
0
),
gradOutput
.
size
(
1
)
*
gradOutput
.
size
(
2
),
gradOutput
.
size
(
3
),
gradOutput
.
size
(
4
),
gradOutput
.
size
(
5
)});
gradOutput
.
size
(
3
),
gradOutput
.
size
(
4
),
gradOutput
.
size
(
5
)});
if
(
input
.
device
().
is_cuda
())
{
deformable_col2im_coord_impl
(
columns
,
input
[
elt
],
offset
[
elt
],
nInputPlane
,
#ifdef MMCV_WITH_CUDA
inputHeight
,
inputWidth
,
kH
,
kW
,
padH
,
padW
,
deformable_col2im_coord
(
columns
,
input
[
elt
],
offset
[
elt
],
nInputPlane
,
dH
,
dW
,
dilationH
,
dilationW
,
im2col_step
,
inputHeight
,
inputWidth
,
kH
,
kW
,
padH
,
padW
,
dH
,
deformable_group
,
gradOffset
[
elt
]);
dW
,
dilationH
,
dilationW
,
im2col_step
,
deformable_group
,
gradOffset
[
elt
]);
deformable_col2im_impl
(
columns
,
offset
[
elt
],
nInputPlane
,
inputHeight
,
inputWidth
,
kH
,
kW
,
padH
,
padW
,
dH
,
dW
,
dilationH
,
deformable_col2im
(
columns
,
offset
[
elt
],
nInputPlane
,
inputHeight
,
dilationW
,
im2col_step
,
deformable_group
,
inputWidth
,
kH
,
kW
,
padH
,
padW
,
dH
,
dW
,
dilationH
,
gradInput
[
elt
]);
dilationW
,
im2col_step
,
deformable_group
,
gradInput
[
elt
]);
#endif
}
else
{
deformable_col2im_coord_cpu
(
columns
,
input
[
elt
],
offset
[
elt
],
nInputPlane
,
inputHeight
,
inputWidth
,
kH
,
kW
,
padH
,
padW
,
dH
,
dW
,
dilationH
,
dilationW
,
im2col_step
,
deformable_group
,
gradOffset
[
elt
]);
deformable_col2im_cpu
(
columns
,
offset
[
elt
],
nInputPlane
,
inputHeight
,
inputWidth
,
kH
,
kW
,
padH
,
padW
,
dH
,
dW
,
dilationH
,
dilationW
,
im2col_step
,
deformable_group
,
gradInput
[
elt
]);
}
weight
=
weight
.
view
({
weight
.
size
(
0
)
*
weight
.
size
(
1
),
weight
.
size
(
2
),
weight
=
weight
.
view
({
weight
.
size
(
0
)
*
weight
.
size
(
1
),
weight
.
size
(
2
),
weight
.
size
(
3
),
weight
.
size
(
4
)});
weight
.
size
(
3
),
weight
.
size
(
4
)});
...
@@ -508,17 +475,9 @@ void deform_conv_backward_parameters(Tensor input, Tensor offset,
...
@@ -508,17 +475,9 @@ void deform_conv_backward_parameters(Tensor input, Tensor offset,
deformable_group
*
2
*
kH
*
kW
,
outputHeight
,
outputWidth
});
deformable_group
*
2
*
kH
*
kW
,
outputHeight
,
outputWidth
});
for
(
int
elt
=
0
;
elt
<
batchSize
/
im2col_step
;
elt
++
)
{
for
(
int
elt
=
0
;
elt
<
batchSize
/
im2col_step
;
elt
++
)
{
if
(
input
.
device
().
is_cuda
())
{
deformable_im2col_impl
(
input
[
elt
],
offset
[
elt
],
nInputPlane
,
inputHeight
,
#ifdef MMCV_WITH_CUDA
inputWidth
,
kH
,
kW
,
padH
,
padW
,
dH
,
dW
,
dilationH
,
deformable_im2col
(
input
[
elt
],
offset
[
elt
],
nInputPlane
,
inputHeight
,
dilationW
,
im2col_step
,
deformable_group
,
columns
);
inputWidth
,
kH
,
kW
,
padH
,
padW
,
dH
,
dW
,
dilationH
,
dilationW
,
im2col_step
,
deformable_group
,
columns
);
#endif
}
else
{
deformable_im2col_cpu
(
input
[
elt
],
offset
[
elt
],
nInputPlane
,
inputHeight
,
inputWidth
,
kH
,
kW
,
padH
,
padW
,
dH
,
dW
,
dilationH
,
dilationW
,
im2col_step
,
deformable_group
,
columns
);
}
// divide into group
// divide into group
gradOutputBuffer
=
gradOutputBuffer
.
view
(
gradOutputBuffer
=
gradOutputBuffer
.
view
(
...
...
mmcv/ops/csrc/parrots/deform_conv_cpu.cpp
deleted
100644 → 0
View file @
0bcbeadb
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
template
<
typename
T
>
T
deformable_im2col_bilinear_cpu
(
const
T
*
input
,
const
int
data_width
,
const
int
height
,
const
int
width
,
T
h
,
T
w
)
{
if
(
h
<=
-
1
||
height
<=
h
||
w
<=
-
1
||
width
<=
w
)
{
return
0
;
}
int
h_low
=
floor
(
h
);
int
w_low
=
floor
(
w
);
int
h_high
=
h_low
+
1
;
int
w_high
=
w_low
+
1
;
T
lh
=
h
-
h_low
;
T
lw
=
w
-
w_low
;
T
hh
=
1
-
lh
,
hw
=
1
-
lw
;
T
v1
=
0
;
if
(
h_low
>=
0
&&
w_low
>=
0
)
v1
=
input
[
h_low
*
data_width
+
w_low
];
T
v2
=
0
;
if
(
h_low
>=
0
&&
w_high
<=
width
-
1
)
v2
=
input
[
h_low
*
data_width
+
w_high
];
T
v3
=
0
;
if
(
h_high
<=
height
-
1
&&
w_low
>=
0
)
v3
=
input
[
h_high
*
data_width
+
w_low
];
T
v4
=
0
;
if
(
h_high
<=
height
-
1
&&
w_high
<=
width
-
1
)
v4
=
input
[
h_high
*
data_width
+
w_high
];
T
w1
=
hh
*
hw
,
w2
=
hh
*
lw
,
w3
=
lh
*
hw
,
w4
=
lh
*
lw
;
T
val
=
(
w1
*
v1
+
w2
*
v2
+
w3
*
v3
+
w4
*
v4
);
return
val
;
}
template
<
typename
T
>
T
get_gradient_weight_cpu
(
T
argmax_h
,
T
argmax_w
,
const
int
h
,
const
int
w
,
const
int
height
,
const
int
width
)
{
if
(
argmax_h
<=
-
1
||
argmax_h
>=
height
||
argmax_w
<=
-
1
||
argmax_w
>=
width
)
{
// empty
return
0
;
}
int
argmax_h_low
=
floor
(
argmax_h
);
int
argmax_w_low
=
floor
(
argmax_w
);
int
argmax_h_high
=
argmax_h_low
+
1
;
int
argmax_w_high
=
argmax_w_low
+
1
;
T
weight
=
0
;
if
(
h
==
argmax_h_low
&&
w
==
argmax_w_low
)
weight
=
(
h
+
1
-
argmax_h
)
*
(
w
+
1
-
argmax_w
);
if
(
h
==
argmax_h_low
&&
w
==
argmax_w_high
)
weight
=
(
h
+
1
-
argmax_h
)
*
(
argmax_w
+
1
-
w
);
if
(
h
==
argmax_h_high
&&
w
==
argmax_w_low
)
weight
=
(
argmax_h
+
1
-
h
)
*
(
w
+
1
-
argmax_w
);
if
(
h
==
argmax_h_high
&&
w
==
argmax_w_high
)
weight
=
(
argmax_h
+
1
-
h
)
*
(
argmax_w
+
1
-
w
);
return
weight
;
}
template
<
typename
T
>
T
get_coordinate_weight_cpu
(
T
argmax_h
,
T
argmax_w
,
const
int
height
,
const
int
width
,
const
T
*
im_data
,
const
int
data_width
,
const
int
bp_dir
)
{
if
(
argmax_h
<=
-
1
||
argmax_h
>=
height
||
argmax_w
<=
-
1
||
argmax_w
>=
width
)
{
// empty
return
0
;
}
int
argmax_h_low
=
floor
(
argmax_h
);
int
argmax_w_low
=
floor
(
argmax_w
);
int
argmax_h_high
=
argmax_h_low
+
1
;
int
argmax_w_high
=
argmax_w_low
+
1
;
T
weight
=
0
;
if
(
bp_dir
==
0
)
{
if
(
argmax_h_low
>=
0
&&
argmax_w_low
>=
0
)
weight
+=
-
1
*
(
argmax_w_low
+
1
-
argmax_w
)
*
im_data
[
argmax_h_low
*
data_width
+
argmax_w_low
];
if
(
argmax_h_low
>=
0
&&
argmax_w_high
<=
width
-
1
)
weight
+=
-
1
*
(
argmax_w
-
argmax_w_low
)
*
im_data
[
argmax_h_low
*
data_width
+
argmax_w_high
];
if
(
argmax_h_high
<=
height
-
1
&&
argmax_w_low
>=
0
)
weight
+=
(
argmax_w_low
+
1
-
argmax_w
)
*
im_data
[
argmax_h_high
*
data_width
+
argmax_w_low
];
if
(
argmax_h_high
<=
height
-
1
&&
argmax_w_high
<=
width
-
1
)
weight
+=
(
argmax_w
-
argmax_w_low
)
*
im_data
[
argmax_h_high
*
data_width
+
argmax_w_high
];
}
else
if
(
bp_dir
==
1
)
{
if
(
argmax_h_low
>=
0
&&
argmax_w_low
>=
0
)
weight
+=
-
1
*
(
argmax_h_low
+
1
-
argmax_h
)
*
im_data
[
argmax_h_low
*
data_width
+
argmax_w_low
];
if
(
argmax_h_low
>=
0
&&
argmax_w_high
<=
width
-
1
)
weight
+=
(
argmax_h_low
+
1
-
argmax_h
)
*
im_data
[
argmax_h_low
*
data_width
+
argmax_w_high
];
if
(
argmax_h_high
<=
height
-
1
&&
argmax_w_low
>=
0
)
weight
+=
-
1
*
(
argmax_h
-
argmax_h_low
)
*
im_data
[
argmax_h_high
*
data_width
+
argmax_w_low
];
if
(
argmax_h_high
<=
height
-
1
&&
argmax_w_high
<=
width
-
1
)
weight
+=
(
argmax_h
-
argmax_h_low
)
*
im_data
[
argmax_h_high
*
data_width
+
argmax_w_high
];
}
return
weight
;
}
template
<
typename
T
>
void
deformable_im2col_cpu_kernel
(
const
int
n
,
const
T
*
data_im
,
const
T
*
data_offset
,
const
int
height
,
const
int
width
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
channel_per_deformable_group
,
const
int
batch_size
,
const
int
num_channels
,
const
int
deformable_group
,
const
int
height_col
,
const
int
width_col
,
T
*
data_col
)
{
for
(
int
index
=
0
;
index
<
n
;
index
++
)
{
// index index of output matrix
const
int
w_col
=
index
%
width_col
;
const
int
h_col
=
(
index
/
width_col
)
%
height_col
;
const
int
b_col
=
(
index
/
width_col
/
height_col
)
%
batch_size
;
const
int
c_im
=
(
index
/
width_col
/
height_col
)
/
batch_size
;
const
int
c_col
=
c_im
*
kernel_h
*
kernel_w
;
// compute deformable group index
const
int
deformable_group_index
=
c_im
/
channel_per_deformable_group
;
const
int
h_in
=
h_col
*
stride_h
-
pad_h
;
const
int
w_in
=
w_col
*
stride_w
-
pad_w
;
T
*
data_col_ptr
=
data_col
+
((
c_col
*
batch_size
+
b_col
)
*
height_col
+
h_col
)
*
width_col
+
w_col
;
const
T
*
data_im_ptr
=
data_im
+
(
b_col
*
num_channels
+
c_im
)
*
height
*
width
;
const
T
*
data_offset_ptr
=
data_offset
+
(
b_col
*
deformable_group
+
deformable_group_index
)
*
2
*
kernel_h
*
kernel_w
*
height_col
*
width_col
;
for
(
int
i
=
0
;
i
<
kernel_h
;
++
i
)
{
for
(
int
j
=
0
;
j
<
kernel_w
;
++
j
)
{
const
int
data_offset_h_ptr
=
((
2
*
(
i
*
kernel_w
+
j
))
*
height_col
+
h_col
)
*
width_col
+
w_col
;
const
int
data_offset_w_ptr
=
((
2
*
(
i
*
kernel_w
+
j
)
+
1
)
*
height_col
+
h_col
)
*
width_col
+
w_col
;
const
T
offset_h
=
data_offset_ptr
[
data_offset_h_ptr
];
const
T
offset_w
=
data_offset_ptr
[
data_offset_w_ptr
];
T
val
=
static_cast
<
T
>
(
0
);
const
T
h_im
=
h_in
+
i
*
dilation_h
+
offset_h
;
const
T
w_im
=
w_in
+
j
*
dilation_w
+
offset_w
;
if
(
h_im
>
-
1
&&
w_im
>
-
1
&&
h_im
<
height
&&
w_im
<
width
)
val
=
deformable_im2col_bilinear_cpu
(
data_im_ptr
,
width
,
height
,
width
,
h_im
,
w_im
);
*
data_col_ptr
=
val
;
data_col_ptr
+=
batch_size
*
height_col
*
width_col
;
}
}
}
}
template
<
typename
T
>
void
deformable_col2im_cpu_kernel
(
const
int
n
,
const
T
*
data_col
,
const
T
*
data_offset
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
channel_per_deformable_group
,
const
int
batch_size
,
const
int
deformable_group
,
const
int
height_col
,
const
int
width_col
,
T
*
grad_im
)
{
for
(
int
index
=
0
;
index
<
n
;
index
++
)
{
const
int
j
=
(
index
/
width_col
/
height_col
/
batch_size
)
%
kernel_w
;
const
int
i
=
(
index
/
width_col
/
height_col
/
batch_size
/
kernel_w
)
%
kernel_h
;
const
int
c
=
index
/
width_col
/
height_col
/
batch_size
/
kernel_w
/
kernel_h
;
// compute the start and end of the output
const
int
deformable_group_index
=
c
/
channel_per_deformable_group
;
int
w_out
=
index
%
width_col
;
int
h_out
=
(
index
/
width_col
)
%
height_col
;
int
b
=
(
index
/
width_col
/
height_col
)
%
batch_size
;
int
w_in
=
w_out
*
stride_w
-
pad_w
;
int
h_in
=
h_out
*
stride_h
-
pad_h
;
const
T
*
data_offset_ptr
=
data_offset
+
(
b
*
deformable_group
+
deformable_group_index
)
*
2
*
kernel_h
*
kernel_w
*
height_col
*
width_col
;
const
int
data_offset_h_ptr
=
((
2
*
(
i
*
kernel_w
+
j
))
*
height_col
+
h_out
)
*
width_col
+
w_out
;
const
int
data_offset_w_ptr
=
((
2
*
(
i
*
kernel_w
+
j
)
+
1
)
*
height_col
+
h_out
)
*
width_col
+
w_out
;
const
T
offset_h
=
data_offset_ptr
[
data_offset_h_ptr
];
const
T
offset_w
=
data_offset_ptr
[
data_offset_w_ptr
];
const
T
cur_inv_h_data
=
h_in
+
i
*
dilation_h
+
offset_h
;
const
T
cur_inv_w_data
=
w_in
+
j
*
dilation_w
+
offset_w
;
const
T
cur_top_grad
=
data_col
[
index
];
const
int
cur_h
=
(
int
)
cur_inv_h_data
;
const
int
cur_w
=
(
int
)
cur_inv_w_data
;
for
(
int
dy
=
-
2
;
dy
<=
2
;
dy
++
)
{
for
(
int
dx
=
-
2
;
dx
<=
2
;
dx
++
)
{
if
(
cur_h
+
dy
>=
0
&&
cur_h
+
dy
<
height
&&
cur_w
+
dx
>=
0
&&
cur_w
+
dx
<
width
&&
abs
(
cur_inv_h_data
-
(
cur_h
+
dy
))
<
1
&&
abs
(
cur_inv_w_data
-
(
cur_w
+
dx
))
<
1
)
{
int
cur_bottom_grad_pos
=
((
b
*
channels
+
c
)
*
height
+
cur_h
+
dy
)
*
width
+
cur_w
+
dx
;
T
weight
=
get_gradient_weight_cpu
(
cur_inv_h_data
,
cur_inv_w_data
,
cur_h
+
dy
,
cur_w
+
dx
,
height
,
width
);
*
(
grad_im
+
cur_bottom_grad_pos
)
+=
weight
*
cur_top_grad
;
}
}
}
}
}
template
<
typename
T
>
void
deformable_col2im_coord_cpu_kernel
(
const
int
n
,
const
T
*
data_col
,
const
T
*
data_im
,
const
T
*
data_offset
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
channel_per_deformable_group
,
const
int
batch_size
,
const
int
offset_channels
,
const
int
deformable_group
,
const
int
height_col
,
const
int
width_col
,
T
*
grad_offset
)
{
for
(
int
index
=
0
;
index
<
n
;
index
++
)
{
T
val
=
0
;
int
w
=
index
%
width_col
;
int
h
=
(
index
/
width_col
)
%
height_col
;
int
c
=
(
index
/
width_col
/
height_col
)
%
offset_channels
;
int
b
=
(
index
/
width_col
/
height_col
)
/
offset_channels
;
// compute the start and end of the output
const
int
deformable_group_index
=
c
/
(
2
*
kernel_h
*
kernel_w
);
const
int
col_step
=
kernel_h
*
kernel_w
;
int
cnt
=
0
;
const
T
*
data_col_ptr
=
data_col
+
deformable_group_index
*
channel_per_deformable_group
*
batch_size
*
width_col
*
height_col
;
const
T
*
data_im_ptr
=
data_im
+
(
b
*
deformable_group
+
deformable_group_index
)
*
channel_per_deformable_group
/
kernel_h
/
kernel_w
*
height
*
width
;
const
T
*
data_offset_ptr
=
data_offset
+
(
b
*
deformable_group
+
deformable_group_index
)
*
2
*
kernel_h
*
kernel_w
*
height_col
*
width_col
;
const
int
offset_c
=
c
-
deformable_group_index
*
2
*
kernel_h
*
kernel_w
;
for
(
int
col_c
=
(
offset_c
/
2
);
col_c
<
channel_per_deformable_group
;
col_c
+=
col_step
)
{
const
int
col_pos
=
(((
col_c
*
batch_size
+
b
)
*
height_col
)
+
h
)
*
width_col
+
w
;
const
int
bp_dir
=
offset_c
%
2
;
int
j
=
(
col_pos
/
width_col
/
height_col
/
batch_size
)
%
kernel_w
;
int
i
=
(
col_pos
/
width_col
/
height_col
/
batch_size
/
kernel_w
)
%
kernel_h
;
int
w_out
=
col_pos
%
width_col
;
int
h_out
=
(
col_pos
/
width_col
)
%
height_col
;
int
w_in
=
w_out
*
stride_w
-
pad_w
;
int
h_in
=
h_out
*
stride_h
-
pad_h
;
const
int
data_offset_h_ptr
=
(((
2
*
(
i
*
kernel_w
+
j
))
*
height_col
+
h_out
)
*
width_col
+
w_out
);
const
int
data_offset_w_ptr
=
(((
2
*
(
i
*
kernel_w
+
j
)
+
1
)
*
height_col
+
h_out
)
*
width_col
+
w_out
);
const
T
offset_h
=
data_offset_ptr
[
data_offset_h_ptr
];
const
T
offset_w
=
data_offset_ptr
[
data_offset_w_ptr
];
T
inv_h
=
h_in
+
i
*
dilation_h
+
offset_h
;
T
inv_w
=
w_in
+
j
*
dilation_w
+
offset_w
;
if
(
inv_h
<=
-
1
||
inv_w
<=
-
1
||
inv_h
>=
height
||
inv_w
>=
width
)
inv_h
=
inv_w
=
-
2
;
const
T
weight
=
get_coordinate_weight_cpu
(
inv_h
,
inv_w
,
height
,
width
,
data_im_ptr
+
cnt
*
height
*
width
,
width
,
bp_dir
);
val
+=
weight
*
data_col_ptr
[
col_pos
];
cnt
+=
1
;
}
grad_offset
[
index
]
=
val
;
}
}
void
deformable_im2col_cpu
(
Tensor
data_im
,
Tensor
data_offset
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
ksize_h
,
const
int
ksize_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
parallel_imgs
,
const
int
deformable_group
,
Tensor
data_col
)
{
int
height_col
=
(
height
+
2
*
pad_h
-
(
dilation_h
*
(
ksize_h
-
1
)
+
1
))
/
stride_h
+
1
;
int
width_col
=
(
width
+
2
*
pad_w
-
(
dilation_w
*
(
ksize_w
-
1
)
+
1
))
/
stride_w
+
1
;
int
num_kernels
=
channels
*
height_col
*
width_col
*
parallel_imgs
;
int
channel_per_deformable_group
=
channels
/
deformable_group
;
AT_DISPATCH_FLOATING_TYPES_AND_HALF
(
data_im
.
scalar_type
(),
"deformable_im2col_cpu"
,
[
&
]
{
deformable_im2col_cpu_kernel
<
scalar_t
>
(
num_kernels
,
data_im
.
data_ptr
<
scalar_t
>
(),
data_offset
.
data_ptr
<
scalar_t
>
(),
height
,
width
,
ksize_h
,
ksize_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
channel_per_deformable_group
,
parallel_imgs
,
channels
,
deformable_group
,
height_col
,
width_col
,
data_col
.
data_ptr
<
scalar_t
>
());
});
}
void
deformable_col2im_cpu
(
Tensor
data_col
,
Tensor
data_offset
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
ksize_h
,
const
int
ksize_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
parallel_imgs
,
const
int
deformable_group
,
Tensor
grad_im
)
{
// todo: make sure parallel_imgs is passed in correctly
int
height_col
=
(
height
+
2
*
pad_h
-
(
dilation_h
*
(
ksize_h
-
1
)
+
1
))
/
stride_h
+
1
;
int
width_col
=
(
width
+
2
*
pad_w
-
(
dilation_w
*
(
ksize_w
-
1
)
+
1
))
/
stride_w
+
1
;
int
num_kernels
=
channels
*
ksize_h
*
ksize_w
*
height_col
*
width_col
*
parallel_imgs
;
int
channel_per_deformable_group
=
channels
/
deformable_group
;
AT_DISPATCH_FLOATING_TYPES_AND_HALF
(
data_col
.
scalar_type
(),
"deformable_col2im_gpu"
,
([
&
]
{
const
scalar_t
*
data_col_
=
data_col
.
data_ptr
<
scalar_t
>
();
const
scalar_t
*
data_offset_
=
data_offset
.
data_ptr
<
scalar_t
>
();
scalar_t
*
grad_im_
=
grad_im
.
data_ptr
<
scalar_t
>
();
deformable_col2im_cpu_kernel
<
scalar_t
>
(
num_kernels
,
data_col_
,
data_offset_
,
channels
,
height
,
width
,
ksize_h
,
ksize_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
channel_per_deformable_group
,
parallel_imgs
,
deformable_group
,
height_col
,
width_col
,
grad_im_
);
}));
}
void
deformable_col2im_coord_cpu
(
Tensor
data_col
,
Tensor
data_im
,
Tensor
data_offset
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
ksize_h
,
const
int
ksize_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
parallel_imgs
,
const
int
deformable_group
,
Tensor
grad_offset
)
{
int
height_col
=
(
height
+
2
*
pad_h
-
(
dilation_h
*
(
ksize_h
-
1
)
+
1
))
/
stride_h
+
1
;
int
width_col
=
(
width
+
2
*
pad_w
-
(
dilation_w
*
(
ksize_w
-
1
)
+
1
))
/
stride_w
+
1
;
int
num_kernels
=
height_col
*
width_col
*
2
*
ksize_h
*
ksize_w
*
deformable_group
*
parallel_imgs
;
int
channel_per_deformable_group
=
channels
*
ksize_h
*
ksize_w
/
deformable_group
;
AT_DISPATCH_FLOATING_TYPES_AND_HALF
(
data_col
.
scalar_type
(),
"deformable_col2im_coord_cpu"
,
([
&
]
{
const
scalar_t
*
data_col_
=
data_col
.
data_ptr
<
scalar_t
>
();
const
scalar_t
*
data_im_
=
data_im
.
data_ptr
<
scalar_t
>
();
const
scalar_t
*
data_offset_
=
data_offset
.
data_ptr
<
scalar_t
>
();
scalar_t
*
grad_offset_
=
grad_offset
.
data_ptr
<
scalar_t
>
();
deformable_col2im_coord_cpu_kernel
<
scalar_t
>
(
num_kernels
,
data_col_
,
data_im_
,
data_offset_
,
channels
,
height
,
width
,
ksize_h
,
ksize_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
channel_per_deformable_group
,
parallel_imgs
,
2
*
ksize_h
*
ksize_w
*
deformable_group
,
deformable_group
,
height_col
,
width_col
,
grad_offset_
);
}));
}
mmcv/ops/csrc/parrots/deform_roi_pool.cpp
View file @
a4dc2a72
// Copyright (c) OpenMMLab. All rights reserved
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
void
deform_roi_pool_forward_impl
(
Tensor
input
,
Tensor
rois
,
Tensor
offset
,
void
DeformRoIPoolForwardCUDAKernelLauncher
(
Tensor
input
,
Tensor
rois
,
Tensor
offset
,
Tensor
output
,
int
pooled_height
,
int
pooled_width
,
float
spatial_scale
,
int
sampling_ratio
,
float
gamma
);
void
DeformRoIPoolBackwardCUDAKernelLauncher
(
Tensor
grad_output
,
Tensor
input
,
Tensor
rois
,
Tensor
offset
,
Tensor
grad_input
,
Tensor
grad_offset
,
int
pooled_height
,
int
pooled_width
,
float
spatial_scale
,
int
sampling_ratio
,
float
gamma
);
void
deform_roi_pool_forward_cuda
(
Tensor
input
,
Tensor
rois
,
Tensor
offset
,
Tensor
output
,
int
pooled_height
,
Tensor
output
,
int
pooled_height
,
int
pooled_width
,
float
spatial_scale
,
int
pooled_width
,
float
spatial_scale
,
int
sampling_ratio
,
float
gamma
)
{
int
sampling_ratio
,
float
gamma
)
{
D
eformRoIP
ool
F
orward
CUDAKernelLauncher
(
input
,
rois
,
offset
,
output
,
D
ISPATCH_DEVICE_IMPL
(
deform_roi_p
ool
_f
orward
_impl
,
input
,
rois
,
offset
,
pooled_height
,
pooled_width
,
output
,
pooled_height
,
pooled_width
,
spatial_scale
,
spatial_scale
,
sampling_ratio
,
gamma
);
sampling_ratio
,
gamma
);
}
}
void
deform_roi_pool_backward_
cuda
(
Tensor
grad_output
,
Tensor
input
,
void
deform_roi_pool_backward_
impl
(
Tensor
grad_output
,
Tensor
input
,
Tensor
rois
,
Tensor
offset
,
Tensor
rois
,
Tensor
offset
,
Tensor
grad_input
,
Tensor
grad_offset
,
Tensor
grad_input
,
Tensor
grad_offset
,
int
pooled_height
,
int
pooled_width
,
int
pooled_height
,
int
pooled_width
,
float
spatial_scale
,
int
sampling_ratio
,
float
spatial_scale
,
int
sampling_ratio
,
float
gamma
)
{
float
gamma
)
{
D
eformRoIP
ool
B
ackward
CUDAKernelLauncher
(
D
ISPATCH_DEVICE_IMPL
(
deform_roi_p
ool
_b
ackward
_impl
,
grad_output
,
input
,
rois
,
grad_output
,
input
,
rois
,
offset
,
grad_input
,
grad_offset
,
pooled_height
,
offset
,
grad_input
,
grad_offset
,
pooled_height
,
pooled_width
,
spatial_scale
,
sampling_ratio
,
gamma
);
pooled_width
,
spatial_scale
,
sampling_ratio
,
gamma
);
}
}
#endif
void
deform_roi_pool_forward
(
Tensor
input
,
Tensor
rois
,
Tensor
offset
,
void
deform_roi_pool_forward
(
Tensor
input
,
Tensor
rois
,
Tensor
offset
,
Tensor
output
,
int
pooled_height
,
int
pooled_width
,
Tensor
output
,
int
pooled_height
,
int
pooled_width
,
float
spatial_scale
,
int
sampling_ratio
,
float
spatial_scale
,
int
sampling_ratio
,
float
gamma
)
{
float
gamma
)
{
if
(
input
.
device
().
is_cuda
())
{
deform_roi_pool_forward_impl
(
input
,
rois
,
offset
,
output
,
pooled_height
,
#ifdef MMCV_WITH_CUDA
pooled_width
,
spatial_scale
,
sampling_ratio
,
CHECK_CUDA_INPUT
(
input
);
gamma
);
CHECK_CUDA_INPUT
(
rois
);
CHECK_CUDA_INPUT
(
offset
);
CHECK_CUDA_INPUT
(
output
);
deform_roi_pool_forward_cuda
(
input
,
rois
,
offset
,
output
,
pooled_height
,
pooled_width
,
spatial_scale
,
sampling_ratio
,
gamma
);
#else
AT_ERROR
(
"DeformRoIPool is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"DeformRoIPool is not implemented on CPU"
);
}
}
}
void
deform_roi_pool_backward
(
Tensor
grad_output
,
Tensor
input
,
Tensor
rois
,
void
deform_roi_pool_backward
(
Tensor
grad_output
,
Tensor
input
,
Tensor
rois
,
...
@@ -61,22 +36,7 @@ void deform_roi_pool_backward(Tensor grad_output, Tensor input, Tensor rois,
...
@@ -61,22 +36,7 @@ void deform_roi_pool_backward(Tensor grad_output, Tensor input, Tensor rois,
Tensor
grad_offset
,
int
pooled_height
,
Tensor
grad_offset
,
int
pooled_height
,
int
pooled_width
,
float
spatial_scale
,
int
pooled_width
,
float
spatial_scale
,
int
sampling_ratio
,
float
gamma
)
{
int
sampling_ratio
,
float
gamma
)
{
if
(
grad_output
.
device
().
is_cuda
())
{
deform_roi_pool_backward_impl
(
grad_output
,
input
,
rois
,
offset
,
grad_input
,
#ifdef MMCV_WITH_CUDA
grad_offset
,
pooled_height
,
pooled_width
,
CHECK_CUDA_INPUT
(
grad_output
);
spatial_scale
,
sampling_ratio
,
gamma
);
CHECK_CUDA_INPUT
(
input
);
CHECK_CUDA_INPUT
(
rois
);
CHECK_CUDA_INPUT
(
offset
);
CHECK_CUDA_INPUT
(
grad_input
);
CHECK_CUDA_INPUT
(
grad_offset
);
deform_roi_pool_backward_cuda
(
grad_output
,
input
,
rois
,
offset
,
grad_input
,
grad_offset
,
pooled_height
,
pooled_width
,
spatial_scale
,
sampling_ratio
,
gamma
);
#else
AT_ERROR
(
"DeformRoIPool is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"DeformRoIPool is not implemented on CPU"
);
}
}
}
mmcv/ops/csrc/parrots/focal_loss.cpp
View file @
a4dc2a72
// Copyright (c) OpenMMLab. All rights reserved
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
void
sigmoid_focal_loss_forward_impl
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
void
SigmoidFocalLossForwardCUDAKernelLauncher
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
Tensor
output
,
const
float
gamma
,
const
float
alpha
);
void
SigmoidFocalLossBackwardCUDAKernelLauncher
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
Tensor
grad_input
,
const
float
gamma
,
const
float
alpha
);
void
SoftmaxFocalLossForwardCUDAKernelLauncher
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
Tensor
output
,
const
float
gamma
,
const
float
alpha
);
void
SoftmaxFocalLossBackwardCUDAKernelLauncher
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
Tensor
buff
,
Tensor
grad_input
,
const
float
gamma
,
const
float
alpha
);
void
sigmoid_focal_loss_forward_cuda
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
Tensor
output
,
float
gamma
,
float
alpha
)
{
Tensor
output
,
float
gamma
,
float
alpha
)
{
S
igmoid
F
ocal
L
oss
F
orward
CUDAKernelLauncher
(
input
,
target
,
weight
,
output
,
DISPATCH_DEVICE_IMPL
(
s
igmoid
_f
ocal
_l
oss
_f
orward
_impl
,
input
,
target
,
weight
,
gamma
,
alpha
);
output
,
gamma
,
alpha
);
}
}
void
sigmoid_focal_loss_backward_
cuda
(
Tensor
input
,
Tensor
target
,
void
sigmoid_focal_loss_backward_
impl
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
Tensor
grad_input
,
Tensor
weight
,
Tensor
grad_input
,
float
gamma
,
float
alpha
)
{
float
gamma
,
float
alpha
)
{
S
igmoid
F
ocal
L
oss
B
ackward
CUDAKernelLauncher
(
input
,
target
,
weight
,
grad_input
,
DISPATCH_DEVICE_IMPL
(
s
igmoid
_f
ocal
_l
oss
_b
ackward
_impl
,
input
,
target
,
weight
,
gamma
,
alpha
);
grad_input
,
gamma
,
alpha
);
}
}
void
softmax_focal_loss_forward_
cuda
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
void
softmax_focal_loss_forward_
impl
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
Tensor
output
,
float
gamma
,
float
alpha
)
{
Tensor
output
,
float
gamma
,
float
alpha
)
{
S
oftmax
F
ocal
L
oss
F
orward
CUDAKernelLauncher
(
input
,
target
,
weight
,
output
,
DISPATCH_DEVICE_IMPL
(
s
oftmax
_f
ocal
_l
oss
_f
orward
_impl
,
input
,
target
,
weight
,
gamma
,
alpha
);
output
,
gamma
,
alpha
);
}
}
void
softmax_focal_loss_backward_
cuda
(
Tensor
input
,
Tensor
target
,
void
softmax_focal_loss_backward_
impl
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
Tensor
buff
,
Tensor
weight
,
Tensor
buff
,
Tensor
grad_input
,
float
gamma
,
Tensor
grad_input
,
float
gamma
,
float
alpha
)
{
float
alpha
)
{
S
oftmax
F
ocal
L
oss
B
ackward
CUDAKernelLauncher
(
input
,
target
,
weight
,
buff
,
DISPATCH_DEVICE_IMPL
(
s
oftmax
_f
ocal
_l
oss
_b
ackward
_impl
,
input
,
target
,
weight
,
grad_input
,
gamma
,
alpha
);
buff
,
grad_input
,
gamma
,
alpha
);
}
}
#endif
void
sigmoid_focal_loss_forward
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
void
sigmoid_focal_loss_forward
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
Tensor
output
,
float
gamma
,
float
alpha
)
{
Tensor
output
,
float
gamma
,
float
alpha
)
{
if
(
input
.
device
().
is_cuda
())
{
sigmoid_focal_loss_forward_impl
(
input
,
target
,
weight
,
output
,
gamma
,
alpha
);
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT
(
input
);
CHECK_CUDA_INPUT
(
target
);
CHECK_CUDA_INPUT
(
weight
);
CHECK_CUDA_INPUT
(
output
);
sigmoid_focal_loss_forward_cuda
(
input
,
target
,
weight
,
output
,
gamma
,
alpha
);
#else
AT_ERROR
(
"SigmoidFocalLoss is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"SigmoidFocalLoss is not implemented on CPU"
);
}
}
}
void
sigmoid_focal_loss_backward
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
void
sigmoid_focal_loss_backward
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
Tensor
grad_input
,
float
gamma
,
float
alpha
)
{
Tensor
grad_input
,
float
gamma
,
float
alpha
)
{
if
(
input
.
device
().
is_cuda
())
{
sigmoid_focal_loss_backward_impl
(
input
,
target
,
weight
,
grad_input
,
gamma
,
#ifdef MMCV_WITH_CUDA
alpha
);
CHECK_CUDA_INPUT
(
input
);
CHECK_CUDA_INPUT
(
target
);
CHECK_CUDA_INPUT
(
weight
);
CHECK_CUDA_INPUT
(
grad_input
);
sigmoid_focal_loss_backward_cuda
(
input
,
target
,
weight
,
grad_input
,
gamma
,
alpha
);
#else
AT_ERROR
(
"SigmoidFocalLoss is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"SigmoidFocalLoss is not implemented on CPU"
);
}
}
}
void
softmax_focal_loss_forward
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
void
softmax_focal_loss_forward
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
Tensor
output
,
float
gamma
,
float
alpha
)
{
Tensor
output
,
float
gamma
,
float
alpha
)
{
if
(
input
.
device
().
is_cuda
())
{
softmax_focal_loss_forward_impl
(
input
,
target
,
weight
,
output
,
gamma
,
alpha
);
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT
(
input
);
CHECK_CUDA_INPUT
(
target
);
CHECK_CUDA_INPUT
(
weight
);
CHECK_CUDA_INPUT
(
output
);
softmax_focal_loss_forward_cuda
(
input
,
target
,
weight
,
output
,
gamma
,
alpha
);
#else
AT_ERROR
(
"SoftmaxFocalLoss is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"SoftmaxFocalLoss is not implemented on CPU"
);
}
}
}
void
softmax_focal_loss_backward
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
void
softmax_focal_loss_backward
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
Tensor
buff
,
Tensor
grad_input
,
float
gamma
,
Tensor
buff
,
Tensor
grad_input
,
float
gamma
,
float
alpha
)
{
float
alpha
)
{
if
(
input
.
device
().
is_cuda
())
{
softmax_focal_loss_backward_impl
(
input
,
target
,
weight
,
buff
,
grad_input
,
#ifdef MMCV_WITH_CUDA
gamma
,
alpha
);
CHECK_CUDA_INPUT
(
input
);
CHECK_CUDA_INPUT
(
target
);
CHECK_CUDA_INPUT
(
weight
);
CHECK_CUDA_INPUT
(
buff
);
CHECK_CUDA_INPUT
(
grad_input
);
softmax_focal_loss_backward_cuda
(
input
,
target
,
weight
,
buff
,
grad_input
,
gamma
,
alpha
);
#else
AT_ERROR
(
"SoftmaxFocalLoss is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"SoftmaxFocalLoss is not implemented on CPU"
);
}
}
}
mmcv/ops/csrc/parrots/furthest_point_sample.cpp
View file @
a4dc2a72
...
@@ -2,61 +2,33 @@
...
@@ -2,61 +2,33 @@
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/sampling.cpp
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/sampling.cpp
#include "pytorch_cpp_helper.hpp"
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
void
furthest_point_sampling_forward_impl
(
Tensor
points_tensor
,
void
FurthestPointSamplingForwardCUDAKernelLauncher
(
int
b
,
int
n
,
int
m
,
Tensor
temp_tensor
,
Tensor
idx_tensor
,
const
float
*
dataset
,
int
b
,
int
n
,
int
m
)
{
float
*
temp
,
int
*
idxs
);
DISPATCH_DEVICE_IMPL
(
furthest_point_sampling_forward_impl
,
points_tensor
,
temp_tensor
,
idx_tensor
,
b
,
n
,
m
);
void
furthest_point_sampling_forward_cuda
(
int
b
,
int
n
,
int
m
,
const
float
*
dataset
,
float
*
temp
,
int
*
idxs
)
{
FurthestPointSamplingForwardCUDAKernelLauncher
(
b
,
n
,
m
,
dataset
,
temp
,
idxs
);
}
}
void
FurthestPointSamplingWithDistForwardCUDAKernelLauncher
(
void
furthest_point_sampling_with_dist_forward_impl
(
Tensor
points_tensor
,
int
b
,
int
n
,
int
m
,
const
float
*
dataset
,
float
*
temp
,
int
*
idxs
);
Tensor
temp_tensor
,
Tensor
idx_tensor
,
int
b
,
void
furthest_point_sampling_with_dist_forward_cuda
(
int
b
,
int
n
,
int
m
,
int
n
,
int
m
)
{
const
float
*
dataset
,
DISPATCH_DEVICE_IMPL
(
furthest_point_sampling_with_dist_forward_impl
,
float
*
temp
,
int
*
idxs
)
{
points_tensor
,
temp_tensor
,
idx_tensor
,
b
,
n
,
m
);
FurthestPointSamplingWithDistForwardCUDAKernelLauncher
(
b
,
n
,
m
,
dataset
,
temp
,
idxs
);
}
}
#endif
void
furthest_point_sampling_forward
(
Tensor
points_tensor
,
Tensor
temp_tensor
,
void
furthest_point_sampling_forward
(
Tensor
points_tensor
,
Tensor
temp_tensor
,
Tensor
idx_tensor
,
int
b
,
int
n
,
int
m
)
{
Tensor
idx_tensor
,
int
b
,
int
n
,
int
m
)
{
if
(
points_tensor
.
device
().
is_cuda
())
{
furthest_point_sampling_forward_impl
(
points_tensor
,
temp_tensor
,
idx_tensor
,
#ifdef MMCV_WITH_CUDA
b
,
n
,
m
);
const
float
*
points
=
points_tensor
.
data_ptr
<
float
>
();
float
*
temp
=
temp_tensor
.
data_ptr
<
float
>
();
int
*
idx
=
idx_tensor
.
data_ptr
<
int
>
();
furthest_point_sampling_forward_cuda
(
b
,
n
,
m
,
points
,
temp
,
idx
);
#else
AT_ERROR
(
"furthest_point_sampling is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"furthest_point_sampling is not implemented on CPU"
);
}
}
}
void
furthest_point_sampling_with_dist_forward
(
Tensor
points_tensor
,
void
furthest_point_sampling_with_dist_forward
(
Tensor
points_tensor
,
Tensor
temp_tensor
,
Tensor
temp_tensor
,
Tensor
idx_tensor
,
int
b
,
int
n
,
Tensor
idx_tensor
,
int
b
,
int
n
,
int
m
)
{
int
m
)
{
if
(
points_tensor
.
device
().
is_cuda
())
{
furthest_point_sampling_with_dist_forward_impl
(
points_tensor
,
temp_tensor
,
#ifdef MMCV_WITH_CUDA
idx_tensor
,
b
,
n
,
m
);
const
float
*
points
=
points_tensor
.
data
<
float
>
();
float
*
temp
=
temp_tensor
.
data
<
float
>
();
int
*
idx
=
idx_tensor
.
data
<
int
>
();
furthest_point_sampling_with_dist_forward_cuda
(
b
,
n
,
m
,
points
,
temp
,
idx
);
#else
AT_ERROR
(
"furthest_point_sampling_with_dist is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"furthest_point_sampling_with_dist is not implemented on CPU"
);
}
}
}
mmcv/ops/csrc/parrots/fused_bias_leakyrelu.cpp
View file @
a4dc2a72
// Copyright (c) OpenMMLab. All rights reserved
// Modified from
// Modified from
// from
// https://github.com/rosinality/stylegan2-pytorch/blob/master/op/fused_bias_act.cpp
// https://github.com/rosinality/stylegan2-pytorch/blob/master/op/fused_bias_act.cpp
#include "pytorch_cpp_helper.hpp"
#ifdef MMCV_WITH_CUDA
/*
torch
::
Tensor
fused_bias_leakyrelu_op
(
const
torch
::
Tensor
&
input
,
Copyright (c) 2021, NVIDIA Corporation. All rights reserved.
const
torch
::
Tensor
&
bias
,
const
torch
::
Tensor
&
refer
,
int
act
,
NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator
int
grad
,
float
alpha
,
float
scale
);
Augmentation (ADA)
=======================================================================
1. Definitions
"Licensor" means any person or entity that distributes its Work.
"Software" means the original work of authorship made available under
this License.
"Work" means the Software and any additions to or derivative works of
the Software that are made available under this License.
The terms "reproduce," "reproduction," "derivative works," and
"distribution" have the meaning as provided under U.S. copyright law;
provided, however, that for the purposes of this License, derivative
works shall not include works that remain separable from, or merely
link (or bind by name) to the interfaces of, the Work.
Works, including the Software, are "made available" under this License
by including in or with the Work either (a) a copyright notice
referencing the applicability of this License to the Work, or (b) a
copy of this License.
2. License Grants
2.1 Copyright Grant. Subject to the terms and conditions of this
License, each Licensor grants to you a perpetual, worldwide,
non-exclusive, royalty-free, copyright license to reproduce,
prepare derivative works of, publicly display, publicly perform,
sublicense and distribute its Work and any resulting derivative
works in any form.
3. Limitations
3.1 Redistribution. You may reproduce or distribute the Work only
if (a) you do so under this License, (b) you include a complete
copy of this License with your distribution, and (c) you retain
without modification any copyright, patent, trademark, or
attribution notices that are present in the Work.
3.2 Derivative Works. You may specify that additional or different
terms apply to the use, reproduction, and distribution of your
derivative works of the Work ("Your Terms") only if (a) Your Terms
provide that the use limitation in Section 3.3 applies to your
derivative works, and (b) you identify the specific derivative
works that are subject to Your Terms. Notwithstanding Your Terms,
this License (including the redistribution requirements in Section
3.1) will continue to apply to the Work itself.
#endif
3.3 Use Limitation. The Work and any derivative works thereof only
may be used or intended for use non-commercially. Notwithstanding
the foregoing, NVIDIA and its affiliates may use the Work and any
derivative works commercially. As used herein, "non-commercially"
means for research or evaluation purposes only.
3.4 Patent Claims. If you bring or threaten to bring a patent claim
against any Licensor (including any claim, cross-claim or
counterclaim in a lawsuit) to enforce any patents that you allege
are infringed by any Work, then your rights under this License from
such Licensor (including the grant in Section 2.1) will terminate
immediately.
3.5 Trademarks. This License does not grant any rights to use any
Licensor’s or its affiliates’ names, logos, or trademarks, except
as necessary to reproduce the notices described in this License.
3.6 Termination. If you violate any term of this License, then your
rights under this License (including the grant in Section 2.1) will
terminate immediately.
4. Disclaimer of Warranty.
THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR
NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER
THIS LICENSE.
5. Limitation of Liability.
EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK
(INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION,
LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER
COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF
THE POSSIBILITY OF SUCH DAMAGES.
=======================================================================
*/
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
torch
::
Tensor
fused_bias_leakyrelu_op_impl
(
const
torch
::
Tensor
&
input
,
const
torch
::
Tensor
&
bias
,
const
torch
::
Tensor
&
refer
,
int
act
,
int
grad
,
float
alpha
,
float
scale
)
{
return
DISPATCH_DEVICE_IMPL
(
fused_bias_leakyrelu_op_impl
,
input
,
bias
,
refer
,
act
,
grad
,
alpha
,
scale
);
}
torch
::
Tensor
fused_bias_leakyrelu
(
const
torch
::
Tensor
&
input
,
torch
::
Tensor
fused_bias_leakyrelu
(
const
torch
::
Tensor
&
input
,
const
torch
::
Tensor
&
bias
,
const
torch
::
Tensor
&
bias
,
const
torch
::
Tensor
&
refer
,
int
act
,
const
torch
::
Tensor
&
refer
,
int
act
,
int
grad
,
float
alpha
,
float
scale
)
{
int
grad
,
float
alpha
,
float
scale
)
{
#ifdef MMCV_WITH_CUDA
return
fused_bias_leakyrelu_op_impl
(
input
,
bias
,
refer
,
act
,
grad
,
alpha
,
CHECK_CUDA
(
input
);
scale
);
CHECK_CUDA
(
bias
);
return
fused_bias_leakyrelu_op
(
input
,
bias
,
refer
,
act
,
grad
,
alpha
,
scale
);
#else
AT_ERROR
(
"Fused bias leakyrelu is not compiled with GPU support"
);
#endif
}
}
mmcv/ops/csrc/parrots/gather_points.cpp
View file @
a4dc2a72
#include "pytorch_cpp_helper.hpp"
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
void
gather_points_forward_impl
(
int
b
,
int
c
,
int
n
,
int
npoints
,
void
GatherPointsForwardCUDAKernelLauncher
(
int
b
,
int
c
,
int
n
,
int
npoints
,
const
Tensor
points
,
const
Tensor
idx
,
Tensor
out
);
void
gather_points_forward_cuda
(
int
b
,
int
c
,
int
n
,
int
npoints
,
const
Tensor
points
,
const
Tensor
idx
,
const
Tensor
points
,
const
Tensor
idx
,
Tensor
out
)
{
Tensor
out
)
{
GatherPointsForwardCUDAKernelLauncher
(
b
,
c
,
n
,
npoints
,
points
,
idx
,
out
);
DISPATCH_DEVICE_IMPL
(
gather_points_forward_impl
,
b
,
c
,
n
,
npoints
,
points
,
};
idx
,
out
);
}
void
GatherPointsBackwardCUDAKernelLauncher
(
int
b
,
int
c
,
int
n
,
int
npoints
,
const
Tensor
grad_out
,
const
Tensor
idx
,
Tensor
grad_points
);
void
gather_points_backward_
cuda
(
int
b
,
int
c
,
int
n
,
int
npoints
,
void
gather_points_backward_
impl
(
int
b
,
int
c
,
int
n
,
int
npoints
,
const
Tensor
grad_out
,
const
Tensor
idx
,
const
Tensor
grad_out
,
const
Tensor
idx
,
Tensor
grad_points
)
{
Tensor
grad_points
)
{
GatherPointsBackwardCUDAKernelLauncher
(
b
,
c
,
n
,
npoints
,
grad_out
,
idx
,
DISPATCH_DEVICE_IMPL
(
gather_points_backward_impl
,
b
,
c
,
n
,
npoints
,
grad_out
,
grad_points
);
idx
,
grad_points
);
};
}
#endif
void
gather_points_forward
(
Tensor
points_tensor
,
Tensor
idx_tensor
,
void
gather_points_forward
(
Tensor
points_tensor
,
Tensor
idx_tensor
,
Tensor
out_tensor
,
int
b
,
int
c
,
int
n
,
Tensor
out_tensor
,
int
b
,
int
c
,
int
n
,
int
npoints
)
{
int
npoints
)
{
if
(
points_tensor
.
device
().
is_cuda
())
{
gather_points_forward_impl
(
b
,
c
,
n
,
npoints
,
points_tensor
,
idx_tensor
,
#ifdef MMCV_WITH_CUDA
out_tensor
);
gather_points_forward_cuda
(
b
,
c
,
n
,
npoints
,
points_tensor
,
idx_tensor
,
out_tensor
);
#else
AT_ERROR
(
"gather_points is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"gather_points is not implemented on CPU"
);
}
}
}
void
gather_points_backward
(
Tensor
grad_out_tensor
,
Tensor
idx_tensor
,
void
gather_points_backward
(
Tensor
grad_out_tensor
,
Tensor
idx_tensor
,
Tensor
grad_points_tensor
,
int
b
,
int
c
,
int
n
,
Tensor
grad_points_tensor
,
int
b
,
int
c
,
int
n
,
int
npoints
)
{
int
npoints
)
{
if
(
grad_out_tensor
.
device
().
is_cuda
())
{
gather_points_backward_impl
(
b
,
c
,
n
,
npoints
,
grad_out_tensor
,
idx_tensor
,
#ifdef MMCV_WITH_CUDA
grad_points_tensor
);
gather_points_backward_cuda
(
b
,
c
,
n
,
npoints
,
grad_out_tensor
,
idx_tensor
,
grad_points_tensor
);
#else
AT_ERROR
(
"gather_points is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"gather_points is not implemented on CPU"
);
}
}
}
mmcv/ops/csrc/parrots/group_points.cpp
View file @
a4dc2a72
...
@@ -3,56 +3,32 @@
...
@@ -3,56 +3,32 @@
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/group_points.cpp
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/group_points.cpp
#include "pytorch_cpp_helper.hpp"
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
void
group_points_forward_impl
(
int
b
,
int
c
,
int
n
,
int
npoints
,
int
nsample
,
void
GroupPointsForwardCUDAKernelLauncher
(
int
b
,
int
c
,
int
n
,
int
npoints
,
int
nsample
,
const
Tensor
points
,
const
Tensor
idx
,
Tensor
out
);
void
group_points_forward_cuda
(
int
b
,
int
c
,
int
n
,
int
npoints
,
int
nsample
,
const
Tensor
points
,
const
Tensor
idx
,
const
Tensor
points
,
const
Tensor
idx
,
Tensor
out
)
{
Tensor
out
)
{
G
roup
P
oints
F
orward
CUDAKernelLauncher
(
b
,
c
,
n
,
npoints
,
nsample
,
points
,
idx
,
DISPATCH_DEVICE_IMPL
(
g
roup
_p
oints
_f
orward
_impl
,
b
,
c
,
n
,
npoints
,
nsample
,
out
);
points
,
idx
,
out
);
}
;
}
void
GroupPointsBackwardCUDAKernelLauncher
(
int
b
,
int
c
,
int
n
,
int
npoints
,
void
group_points_backward_impl
(
int
b
,
int
c
,
int
n
,
int
npoints
,
int
nsample
,
int
nsample
,
const
Tensor
grad_out
,
const
Tensor
idx
,
Tensor
grad_points
);
void
group_points_backward_cuda
(
int
b
,
int
c
,
int
n
,
int
npoints
,
int
nsample
,
const
Tensor
grad_out
,
const
Tensor
idx
,
const
Tensor
grad_out
,
const
Tensor
idx
,
Tensor
grad_points
)
{
Tensor
grad_points
)
{
GroupPointsBackwardCUDAKernelLauncher
(
b
,
c
,
n
,
npoints
,
nsample
,
grad_out
,
DISPATCH_DEVICE_IMPL
(
group_points_backward_impl
,
b
,
c
,
n
,
npoints
,
nsample
,
idx
,
grad_points
);
grad_out
,
idx
,
grad_points
);
};
}
#endif
void
group_points_forward
(
Tensor
points_tensor
,
Tensor
idx_tensor
,
void
group_points_forward
(
Tensor
points_tensor
,
Tensor
idx_tensor
,
Tensor
out_tensor
,
int
b
,
int
c
,
int
n
,
int
npoints
,
Tensor
out_tensor
,
int
b
,
int
c
,
int
n
,
int
npoints
,
int
nsample
)
{
int
nsample
)
{
if
(
points_tensor
.
device
().
is_cuda
())
{
DISPATCH_DEVICE_IMPL
(
group_points_forward_impl
,
b
,
c
,
n
,
npoints
,
nsample
,
#ifdef MMCV_WITH_CUDA
points_tensor
,
idx_tensor
,
out_tensor
);
group_points_forward_cuda
(
b
,
c
,
n
,
npoints
,
nsample
,
points_tensor
,
idx_tensor
,
out_tensor
);
#else
AT_ERROR
(
"group_points is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"group_points is not implemented on CPU"
);
}
}
}
void
group_points_backward
(
Tensor
grad_out_tensor
,
Tensor
idx_tensor
,
void
group_points_backward
(
Tensor
grad_out_tensor
,
Tensor
idx_tensor
,
Tensor
grad_points_tensor
,
int
b
,
int
c
,
int
n
,
Tensor
grad_points_tensor
,
int
b
,
int
c
,
int
n
,
int
npoints
,
int
nsample
)
{
int
npoints
,
int
nsample
)
{
if
(
grad_out_tensor
.
device
().
is_cuda
())
{
group_points_backward_impl
(
b
,
c
,
n
,
npoints
,
nsample
,
grad_out_tensor
,
#ifdef MMCV_WITH_CUDA
idx_tensor
,
grad_points_tensor
);
group_points_backward_cuda
(
b
,
c
,
n
,
npoints
,
nsample
,
grad_out_tensor
,
idx_tensor
,
grad_points_tensor
);
#else
AT_ERROR
(
"group_points is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"group_points is not implemented on CPU"
);
}
}
}
mmcv/ops/csrc/parrots/info.cpp
0 → 100644
View file @
a4dc2a72
// Copyright (c) OpenMMLab. All rights reserved
// modified from
// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/vision.cpp
#include "pytorch_cpp_helper.hpp"
#ifdef MMCV_WITH_CUDA
#ifndef HIP_DIFF
#include <cuda_runtime_api.h>
int
get_cudart_version
()
{
return
CUDART_VERSION
;
}
#endif
#endif
std
::
string
get_compiling_cuda_version
()
{
#ifdef MMCV_WITH_CUDA
#ifndef HIP_DIFF
std
::
ostringstream
oss
;
// copied from
// https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231
auto
printCudaStyleVersion
=
[
&
](
int
v
)
{
oss
<<
(
v
/
1000
)
<<
"."
<<
(
v
/
10
%
100
);
if
(
v
%
10
!=
0
)
{
oss
<<
"."
<<
(
v
%
10
);
}
};
printCudaStyleVersion
(
get_cudart_version
());
return
oss
.
str
();
#else
return
std
::
string
(
"rocm not available"
);
#endif
#else
return
std
::
string
(
"not available"
);
#endif
}
// similar to
// https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp
std
::
string
get_compiler_version
()
{
std
::
ostringstream
ss
;
#if defined(__GNUC__)
#ifndef __clang__
{
ss
<<
"GCC "
<<
__GNUC__
<<
"."
<<
__GNUC_MINOR__
;
}
#endif
#endif
#if defined(__clang_major__)
{
ss
<<
"clang "
<<
__clang_major__
<<
"."
<<
__clang_minor__
<<
"."
<<
__clang_patchlevel__
;
}
#endif
#if defined(_MSC_VER)
{
ss
<<
"MSVC "
<<
_MSC_FULL_VER
;
}
#endif
return
ss
.
str
();
}
mmcv/ops/csrc/parrots/iou3d.cpp
View file @
a4dc2a72
...
@@ -8,68 +8,35 @@ All Rights Reserved 2019-2020.
...
@@ -8,68 +8,35 @@ All Rights Reserved 2019-2020.
*/
*/
#include "pytorch_cpp_helper.hpp"
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
const
int
THREADS_PER_BLOCK_NMS
=
sizeof
(
unsigned
long
long
)
*
8
;
const
int
THREADS_PER_BLOCK_NMS
=
sizeof
(
unsigned
long
long
)
*
8
;
#ifdef MMCV_WITH_CUDA
void
iou3d_boxes_overlap_bev_forward_impl
(
const
int
num_a
,
const
Tensor
boxes_a
,
#include <cuda.h>
#include <cuda_runtime_api.h>
#define CHECK_ERROR(state) \
{ gpuAssert((state), __FILE__, __LINE__); }
inline
void
gpuAssert
(
cudaError_t
code
,
const
char
*
file
,
int
line
,
bool
abort
=
true
)
{
if
(
code
!=
cudaSuccess
)
{
fprintf
(
stderr
,
"GPUassert: %s %s %d
\n
"
,
cudaGetErrorString
(
code
),
file
,
line
);
if
(
abort
)
exit
(
code
);
}
}
void
IoU3DBoxesOverlapBevForwardCUDAKernelLauncher
(
const
int
num_a
,
const
Tensor
boxes_a
,
const
int
num_b
,
const
Tensor
boxes_b
,
Tensor
ans_overlap
);
void
iou3d_boxes_overlap_bev_forward_cuda
(
const
int
num_a
,
const
Tensor
boxes_a
,
const
int
num_b
,
const
Tensor
boxes_b
,
const
int
num_b
,
const
Tensor
boxes_b
,
Tensor
ans_overlap
)
{
Tensor
ans_overlap
)
{
IoU3DBoxesOverlapBevForwardCUDAKernelLauncher
(
num_a
,
boxes_a
,
num_b
,
boxes_b
,
DISPATCH_DEVICE_IMPL
(
iou3d_boxes_overlap_bev_forward_impl
,
num_a
,
boxes_a
,
ans_overlap
);
num_b
,
boxes_b
,
ans_overlap
);
};
}
void
IoU3DBoxesIoUBevForwardCUDAKernelLauncher
(
const
int
num_a
,
void
iou3d_boxes_iou_bev_forward_impl
(
const
int
num_a
,
const
Tensor
boxes_a
,
const
Tensor
boxes_a
,
const
int
num_b
,
const
Tensor
boxes_b
,
Tensor
ans_iou
);
void
iou3d_boxes_iou_bev_forward_cuda
(
const
int
num_a
,
const
Tensor
boxes_a
,
const
int
num_b
,
const
Tensor
boxes_b
,
const
int
num_b
,
const
Tensor
boxes_b
,
Tensor
ans_iou
)
{
Tensor
ans_iou
)
{
IoU3DBoxesIoUBevForwardCUDAKernelLauncher
(
num_a
,
boxes_a
,
num_b
,
boxes_b
,
DISPATCH_DEVICE_IMPL
(
iou3d_boxes_iou_bev_forward_impl
,
num_a
,
boxes_a
,
num_b
,
ans_iou
);
boxes_b
,
ans_iou
);
};
}
void
IoU3DNMSForwardCUDAKernelLauncher
(
const
Tensor
boxes
,
unsigned
long
long
*
mask
,
int
boxes_num
,
float
nms_overlap_thresh
);
void
iou3d_nms_forward_
cuda
(
const
Tensor
boxes
,
unsigned
long
long
*
mask
,
void
iou3d_nms_forward_
impl
(
const
Tensor
boxes
,
unsigned
long
long
*
mask
,
int
boxes_num
,
float
nms_overlap_thresh
)
{
int
boxes_num
,
float
nms_overlap_thresh
)
{
IoU3DNMSForwardCUDAKernelLauncher
(
boxes
,
mask
,
boxes_num
,
nms_overlap_thresh
);
DISPATCH_DEVICE_IMPL
(
iou3d_nms_forward_impl
,
boxes
,
mask
,
boxes_num
,
};
nms_overlap_thresh
);
}
void
IoU3DNMSNormalForwardCUDAKernelLauncher
(
const
Tensor
boxes
,
unsigned
long
long
*
mask
,
int
boxes_num
,
float
nms_overlap_thresh
);
void
iou3d_nms_normal_forward_
cuda
(
const
Tensor
boxes
,
unsigned
long
long
*
mask
,
void
iou3d_nms_normal_forward_
impl
(
const
Tensor
boxes
,
unsigned
long
long
*
mask
,
int
boxes_num
,
float
nms_overlap_thresh
)
{
int
boxes_num
,
float
nms_overlap_thresh
)
{
IoU3DNMSNormalForwardCUDAKernelLauncher
(
boxes
,
mask
,
boxes_num
,
DISPATCH_DEVICE_IMPL
(
iou3d_nms_normal_forward_impl
,
boxes
,
mask
,
boxes_num
,
nms_overlap_thresh
);
nms_overlap_thresh
);
};
}
#endif
void
iou3d_boxes_overlap_bev_forward
(
Tensor
boxes_a
,
Tensor
boxes_b
,
void
iou3d_boxes_overlap_bev_forward
(
Tensor
boxes_a
,
Tensor
boxes_b
,
Tensor
ans_overlap
)
{
Tensor
ans_overlap
)
{
...
@@ -77,23 +44,11 @@ void iou3d_boxes_overlap_bev_forward(Tensor boxes_a, Tensor boxes_b,
...
@@ -77,23 +44,11 @@ void iou3d_boxes_overlap_bev_forward(Tensor boxes_a, Tensor boxes_b,
// params boxes_b: (M, 5)
// params boxes_b: (M, 5)
// params ans_overlap: (N, M)
// params ans_overlap: (N, M)
if
(
boxes_a
.
device
().
is_cuda
())
{
int
num_a
=
boxes_a
.
size
(
0
);
#ifdef MMCV_WITH_CUDA
int
num_b
=
boxes_b
.
size
(
0
);
CHECK_CUDA_INPUT
(
boxes_a
);
CHECK_CUDA_INPUT
(
boxes_b
);
iou3d_boxes_overlap_bev_forward_impl
(
num_a
,
boxes_a
,
num_b
,
boxes_b
,
CHECK_CUDA_INPUT
(
ans_overlap
);
ans_overlap
);
int
num_a
=
boxes_a
.
size
(
0
);
int
num_b
=
boxes_b
.
size
(
0
);
iou3d_boxes_overlap_bev_forward_cuda
(
num_a
,
boxes_a
,
num_b
,
boxes_b
,
ans_overlap
);
#else
AT_ERROR
(
"iou3d_boxes_overlap_bev is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"iou3d_boxes_overlap_bev is not implemented on CPU"
);
}
}
}
void
iou3d_boxes_iou_bev_forward
(
Tensor
boxes_a
,
Tensor
boxes_b
,
void
iou3d_boxes_iou_bev_forward
(
Tensor
boxes_a
,
Tensor
boxes_b
,
...
@@ -101,77 +56,52 @@ void iou3d_boxes_iou_bev_forward(Tensor boxes_a, Tensor boxes_b,
...
@@ -101,77 +56,52 @@ void iou3d_boxes_iou_bev_forward(Tensor boxes_a, Tensor boxes_b,
// params boxes_a: (N, 5) [x1, y1, x2, y2, ry]
// params boxes_a: (N, 5) [x1, y1, x2, y2, ry]
// params boxes_b: (M, 5)
// params boxes_b: (M, 5)
// params ans_overlap: (N, M)
// params ans_overlap: (N, M)
int
num_a
=
boxes_a
.
size
(
0
);
int
num_b
=
boxes_b
.
size
(
0
);
if
(
boxes_a
.
device
().
is_cuda
())
{
iou3d_boxes_iou_bev_forward_impl
(
num_a
,
boxes_a
,
num_b
,
boxes_b
,
ans_iou
);
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT
(
boxes_a
);
CHECK_CUDA_INPUT
(
boxes_b
);
CHECK_CUDA_INPUT
(
ans_iou
);
int
num_a
=
boxes_a
.
size
(
0
);
int
num_b
=
boxes_b
.
size
(
0
);
iou3d_boxes_iou_bev_forward_cuda
(
num_a
,
boxes_a
,
num_b
,
boxes_b
,
ans_iou
);
#else
AT_ERROR
(
"iou3d_boxes_iou_bev is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"iou3d_boxes_iou_bev is not implemented on CPU"
);
}
}
}
void
iou3d_nms_forward
(
Tensor
boxes
,
Tensor
keep
,
Tensor
keep_num
,
void
iou3d_nms_forward
(
Tensor
boxes
,
Tensor
keep
,
Tensor
keep_num
,
float
nms_overlap_thresh
)
{
float
nms_overlap_thresh
)
{
// params boxes: (N, 5) [x1, y1, x2, y2, ry]
// params boxes: (N, 5) [x1, y1, x2, y2, ry]
// params keep: (N)
// params keep: (N)
CHECK_CONTIGUOUS
(
boxes
);
CHECK_CONTIGUOUS
(
keep
);
if
(
boxes
.
device
().
is_cuda
())
{
int
boxes_num
=
boxes
.
size
(
0
);
#ifdef MMCV_WITH_CUDA
int64_t
*
keep_data
=
keep
.
data_ptr
<
int64_t
>
();
CHECK_CUDA_INPUT
(
boxes
);
int64_t
*
keep_num_data
=
keep_num
.
data_ptr
<
int64_t
>
();
CHECK_CONTIGUOUS
(
keep
);
int
boxes_num
=
boxes
.
size
(
0
);
const
int
col_blocks
=
DIVUP
(
boxes_num
,
THREADS_PER_BLOCK_NMS
);
int64_t
*
keep_data
=
keep
.
data_ptr
<
int64_t
>
();
int64_t
*
keep_num_data
=
keep_num
.
data_ptr
<
int64_t
>
();
const
int
col_blocks
=
DIVUP
(
boxes_num
,
THREADS_PER_BLOCK_NMS
);
Tensor
mask
=
at
::
empty
({
boxes_num
,
col_blocks
},
boxes
.
options
().
dtype
(
at
::
kLong
));
unsigned
long
long
*
mask_data
=
(
unsigned
long
long
*
)
mask
.
data_ptr
<
int64_t
>
();
iou3d_nms_forward_impl
(
boxes
,
mask_data
,
boxes_num
,
nms_overlap_thresh
);
Tensor
mask
=
at
::
Tensor
mask_cpu
=
mask
.
to
(
at
::
kCPU
);
at
::
empty
({
boxes_num
,
col_blocks
},
boxes
.
options
().
dtype
(
at
::
kLong
));
unsigned
long
long
*
mask_host
=
unsigned
long
long
*
mask_data
=
(
unsigned
long
long
*
)
mask_cpu
.
data_ptr
<
int64_t
>
();
(
unsigned
long
long
*
)
mask
.
data_ptr
<
int64_t
>
();
iou3d_nms_forward_cuda
(
boxes
,
mask_data
,
boxes_num
,
nms_overlap_thresh
);
at
::
Tensor
mask_cpu
=
mask
.
to
(
at
::
kCPU
);
std
::
vector
<
unsigned
long
long
>
remv_cpu
(
col_blocks
);
unsigned
long
long
*
mask_host
=
memset
(
&
remv_cpu
[
0
],
0
,
sizeof
(
unsigned
long
long
)
*
col_blocks
);
(
unsigned
long
long
*
)
mask_cpu
.
data_ptr
<
int64_t
>
();
std
::
vector
<
unsigned
long
long
>
remv_cpu
(
col_blocks
);
int
num_to_keep
=
0
;
memset
(
&
remv_cpu
[
0
],
0
,
sizeof
(
unsigned
long
long
)
*
col_blocks
);
int
num_to_keep
=
0
;
for
(
int
i
=
0
;
i
<
boxes_num
;
i
++
)
{
int
nblock
=
i
/
THREADS_PER_BLOCK_NMS
;
int
inblock
=
i
%
THREADS_PER_BLOCK_NMS
;
for
(
int
i
=
0
;
i
<
boxes_num
;
i
++
)
{
if
(
!
(
remv_cpu
[
nblock
]
&
(
1ULL
<<
inblock
)))
{
int
nblock
=
i
/
THREADS_PER_BLOCK_NMS
;
keep_data
[
num_to_keep
++
]
=
i
;
int
inblock
=
i
%
THREADS_PER_BLOCK_NMS
;
unsigned
long
long
*
p
=
&
mask_host
[
0
]
+
i
*
col_blocks
;
for
(
int
j
=
nblock
;
j
<
col_blocks
;
j
++
)
{
if
(
!
(
remv_cpu
[
nblock
]
&
(
1ULL
<<
inblock
)))
{
remv_cpu
[
j
]
|=
p
[
j
];
keep_data
[
num_to_keep
++
]
=
i
;
unsigned
long
long
*
p
=
&
mask_host
[
0
]
+
i
*
col_blocks
;
for
(
int
j
=
nblock
;
j
<
col_blocks
;
j
++
)
{
remv_cpu
[
j
]
|=
p
[
j
];
}
}
}
}
}
if
(
cudaSuccess
!=
cudaGetLastError
())
printf
(
"Error!
\n
"
);
*
keep_num_data
=
num_to_keep
;
*
keep_num_data
=
num_to_keep
;
#else
AT_ERROR
(
"iou3d_nms is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"iou3d_nms is not implemented on CPU"
);
}
}
}
}
...
@@ -180,53 +110,42 @@ void iou3d_nms_normal_forward(Tensor boxes, Tensor keep, Tensor keep_num,
...
@@ -180,53 +110,42 @@ void iou3d_nms_normal_forward(Tensor boxes, Tensor keep, Tensor keep_num,
// params boxes: (N, 5) [x1, y1, x2, y2, ry]
// params boxes: (N, 5) [x1, y1, x2, y2, ry]
// params keep: (N)
// params keep: (N)
if
(
boxes
.
device
().
is_cuda
())
{
CHECK_CONTIGUOUS
(
boxes
);
#ifdef MMCV_WITH_CUDA
CHECK_CONTIGUOUS
(
keep
);
CHECK_CUDA_INPUT
(
boxes
);
CHECK_CONTIGUOUS
(
keep
);
int
boxes_num
=
boxes
.
size
(
0
);
int64_t
*
keep_data
=
keep
.
data_ptr
<
int64_t
>
();
int64_t
*
keep_num_data
=
keep_num
.
data_ptr
<
int64_t
>
();
const
int
col_blocks
=
DIVUP
(
boxes_num
,
THREADS_PER_BLOCK_NMS
);
Tensor
mask
=
at
::
empty
({
boxes_num
,
col_blocks
},
boxes
.
options
().
dtype
(
at
::
kLong
));
unsigned
long
long
*
mask_data
=
(
unsigned
long
long
*
)
mask
.
data_ptr
<
int64_t
>
();
iou3d_nms_normal_forward_cuda
(
boxes
,
mask_data
,
boxes_num
,
nms_overlap_thresh
);
at
::
Tensor
mask_cpu
=
mask
.
to
(
at
::
kCPU
);
unsigned
long
long
*
mask_host
=
(
unsigned
long
long
*
)
mask_cpu
.
data_ptr
<
int64_t
>
();
std
::
vector
<
unsigned
long
long
>
remv_cpu
(
col_blocks
);
memset
(
&
remv_cpu
[
0
],
0
,
sizeof
(
unsigned
long
long
)
*
col_blocks
);
int
num_to_keep
=
0
;
for
(
int
i
=
0
;
i
<
boxes_num
;
i
++
)
{
int
nblock
=
i
/
THREADS_PER_BLOCK_NMS
;
int
inblock
=
i
%
THREADS_PER_BLOCK_NMS
;
if
(
!
(
remv_cpu
[
nblock
]
&
(
1ULL
<<
inblock
)))
{
keep_data
[
num_to_keep
++
]
=
i
;
unsigned
long
long
*
p
=
&
mask_host
[
0
]
+
i
*
col_blocks
;
for
(
int
j
=
nblock
;
j
<
col_blocks
;
j
++
)
{
remv_cpu
[
j
]
|=
p
[
j
];
}
}
}
if
(
cudaSuccess
!=
cudaGetLastError
())
printf
(
"Error!
\n
"
);
int
boxes_num
=
boxes
.
size
(
0
);
int64_t
*
keep_data
=
keep
.
data_ptr
<
int64_t
>
();
int64_t
*
keep_num_data
=
keep_num
.
data_ptr
<
int64_t
>
();
*
keep_num_data
=
num_to_keep
;
const
int
col_blocks
=
DIVUP
(
boxes_num
,
THREADS_PER_BLOCK_NMS
);
Tensor
mask
=
at
::
empty
({
boxes_num
,
col_blocks
},
boxes
.
options
().
dtype
(
at
::
kLong
));
unsigned
long
long
*
mask_data
=
(
unsigned
long
long
*
)
mask
.
data_ptr
<
int64_t
>
();
iou3d_nms_normal_forward_impl
(
boxes
,
mask_data
,
boxes_num
,
nms_overlap_thresh
);
#else
at
::
Tensor
mask_cpu
=
mask
.
to
(
at
::
kCPU
);
AT_ERROR
(
"iou3d_nms_normal is not compiled with GPU support"
);
unsigned
long
long
*
mask_host
=
#endif
(
unsigned
long
long
*
)
mask_cpu
.
data_ptr
<
int64_t
>
();
}
else
{
AT_ERROR
(
"iou3d_nms_normal is not implemented on CPU"
);
std
::
vector
<
unsigned
long
long
>
remv_cpu
(
col_blocks
);
memset
(
&
remv_cpu
[
0
],
0
,
sizeof
(
unsigned
long
long
)
*
col_blocks
);
int
num_to_keep
=
0
;
for
(
int
i
=
0
;
i
<
boxes_num
;
i
++
)
{
int
nblock
=
i
/
THREADS_PER_BLOCK_NMS
;
int
inblock
=
i
%
THREADS_PER_BLOCK_NMS
;
if
(
!
(
remv_cpu
[
nblock
]
&
(
1ULL
<<
inblock
)))
{
keep_data
[
num_to_keep
++
]
=
i
;
unsigned
long
long
*
p
=
&
mask_host
[
0
]
+
i
*
col_blocks
;
for
(
int
j
=
nblock
;
j
<
col_blocks
;
j
++
)
{
remv_cpu
[
j
]
|=
p
[
j
];
}
}
}
}
*
keep_num_data
=
num_to_keep
;
}
}
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment