Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
MMCV
Commits
fdeee889
Commit
fdeee889
authored
May 25, 2025
by
limm
Browse files
release v1.6.1 of mmcv
parent
df465820
Changes
457
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
380 additions
and
2462 deletions
+380
-2462
mmcv/ops/csrc/parrots/modulated_deform_conv.cpp
mmcv/ops/csrc/parrots/modulated_deform_conv.cpp
+47
-148
mmcv/ops/csrc/parrots/modulated_deform_conv_cpu.cpp
mmcv/ops/csrc/parrots/modulated_deform_conv_cpu.cpp
+0
-403
mmcv/ops/csrc/parrots/ms_deform_attn.cpp
mmcv/ops/csrc/parrots/ms_deform_attn.cpp
+23
-42
mmcv/ops/csrc/parrots/nms.cpp
mmcv/ops/csrc/parrots/nms.cpp
+16
-244
mmcv/ops/csrc/parrots/nms_rotated_cpu.cpp
mmcv/ops/csrc/parrots/nms_rotated_cpu.cpp
+0
-66
mmcv/ops/csrc/parrots/pixel_group.cpp
mmcv/ops/csrc/parrots/pixel_group.cpp
+7
-118
mmcv/ops/csrc/parrots/points_in_boxes.cpp
mmcv/ops/csrc/parrots/points_in_boxes.cpp
+20
-68
mmcv/ops/csrc/parrots/points_in_boxes_cpu.cpp
mmcv/ops/csrc/parrots/points_in_boxes_cpu.cpp
+0
-53
mmcv/ops/csrc/parrots/points_in_polygons.cpp
mmcv/ops/csrc/parrots/points_in_polygons.cpp
+15
-0
mmcv/ops/csrc/parrots/points_in_polygons_parrots.cpp
mmcv/ops/csrc/parrots/points_in_polygons_parrots.cpp
+28
-0
mmcv/ops/csrc/parrots/points_in_polygons_pytorch.h
mmcv/ops/csrc/parrots/points_in_polygons_pytorch.h
+9
-0
mmcv/ops/csrc/parrots/psamask.cpp
mmcv/ops/csrc/parrots/psamask.cpp
+13
-228
mmcv/ops/csrc/parrots/riroi_align_rotated.cpp
mmcv/ops/csrc/parrots/riroi_align_rotated.cpp
+42
-0
mmcv/ops/csrc/parrots/riroi_align_rotated_parrots.cpp
mmcv/ops/csrc/parrots/riroi_align_rotated_parrots.cpp
+86
-0
mmcv/ops/csrc/parrots/riroi_align_rotated_pytorch.h
mmcv/ops/csrc/parrots/riroi_align_rotated_pytorch.h
+18
-0
mmcv/ops/csrc/parrots/roi_align.cpp
mmcv/ops/csrc/parrots/roi_align.cpp
+15
-104
mmcv/ops/csrc/parrots/roi_align_cpu.cpp
mmcv/ops/csrc/parrots/roi_align_cpu.cpp
+0
-430
mmcv/ops/csrc/parrots/roi_align_rotated.cpp
mmcv/ops/csrc/parrots/roi_align_rotated.cpp
+27
-127
mmcv/ops/csrc/parrots/roi_align_rotated_cpu.cpp
mmcv/ops/csrc/parrots/roi_align_rotated_cpu.cpp
+0
-417
mmcv/ops/csrc/parrots/roi_align_rotated_parrots.cpp
mmcv/ops/csrc/parrots/roi_align_rotated_parrots.cpp
+14
-14
No files found.
Too many changes to show.
To preserve performance only
457 of 457+
files are displayed.
Plain diff
Email patch
mmcv/ops/csrc/parrots/modulated_deform_conv.cpp
View file @
fdeee889
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
void
modulated_deformable_im2col_cuda
(
void
modulated_deformable_im2col_impl
(
const
Tensor
data_im
,
const
Tensor
data_offset
,
const
Tensor
data_mask
,
const
int
batch_size
,
const
int
channels
,
const
int
height_im
,
const
int
width_im
,
const
int
height_col
,
const
int
width_col
,
const
int
kernel_h
,
const
int
kenerl_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
deformable_group
,
Tensor
data_col
);
void
modulated_deformable_col2im_cuda
(
const
Tensor
data_col
,
const
Tensor
data_offset
,
const
Tensor
data_mask
,
const
int
batch_size
,
const
int
channels
,
const
int
height_im
,
const
int
width_im
,
const
int
height_col
,
const
int
width_col
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
deformable_group
,
Tensor
grad_im
);
void
modulated_deformable_col2im_coord_cuda
(
const
Tensor
data_col
,
const
Tensor
data_im
,
const
Tensor
data_offset
,
const
Tensor
data_mask
,
const
int
batch_size
,
const
int
channels
,
const
int
height_im
,
const
int
width_im
,
const
int
height_col
,
const
int
width_col
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
deformable_group
,
Tensor
grad_offset
,
Tensor
grad_mask
);
#endif
void
modulated_deformable_im2col_cpu
(
const
Tensor
data_im
,
const
Tensor
data_offset
,
const
Tensor
data_mask
,
const
int
batch_size
,
const
int
channels
,
const
int
height_im
,
const
int
width_im
,
const
int
height_col
,
const
int
width_col
,
const
int
kernel_h
,
const
int
kenerl_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
deformable_group
,
Tensor
data_col
);
const
int
dilation_w
,
const
int
deformable_group
,
Tensor
data_col
)
{
DISPATCH_DEVICE_IMPL
(
modulated_deformable_im2col_impl
,
data_im
,
data_offset
,
data_mask
,
batch_size
,
channels
,
height_im
,
width_im
,
height_col
,
width_col
,
kernel_h
,
kernel_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
deformable_group
,
data_col
);
}
void
modulated_deformable_col2im_
cpu
(
void
modulated_deformable_col2im_
impl
(
const
Tensor
data_col
,
const
Tensor
data_offset
,
const
Tensor
data_mask
,
const
int
batch_size
,
const
int
channels
,
const
int
height_im
,
const
int
width_im
,
const
int
height_col
,
const
int
width_col
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
deformable_group
,
Tensor
grad_im
);
const
int
dilation_w
,
const
int
deformable_group
,
Tensor
grad_im
)
{
DISPATCH_DEVICE_IMPL
(
modulated_deformable_col2im_impl
,
data_col
,
data_offset
,
data_mask
,
batch_size
,
channels
,
height_im
,
width_im
,
height_col
,
width_col
,
kernel_h
,
kernel_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
deformable_group
,
grad_im
);
}
void
modulated_deformable_col2im_coord_
cpu
(
void
modulated_deformable_col2im_coord_
impl
(
const
Tensor
data_col
,
const
Tensor
data_im
,
const
Tensor
data_offset
,
const
Tensor
data_mask
,
const
int
batch_size
,
const
int
channels
,
const
int
height_im
,
const
int
width_im
,
const
int
height_col
,
const
int
width_col
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
deformable_group
,
Tensor
grad_offset
,
Tensor
grad_mask
);
Tensor
grad_offset
,
Tensor
grad_mask
)
{
DISPATCH_DEVICE_IMPL
(
modulated_deformable_col2im_coord_impl
,
data_col
,
data_im
,
data_offset
,
data_mask
,
batch_size
,
channels
,
height_im
,
width_im
,
height_col
,
width_col
,
kernel_h
,
kernel_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
deformable_group
,
grad_offset
,
grad_mask
);
}
void
modulated_deform_conv_forward
(
Tensor
input
,
Tensor
weight
,
Tensor
bias
,
Tensor
ones
,
Tensor
offset
,
...
...
@@ -61,31 +51,6 @@ void modulated_deform_conv_forward(
const
int
stride_h
,
const
int
stride_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
group
,
const
int
deformable_group
,
const
bool
with_bias
)
{
if
(
input
.
device
().
is_cuda
())
{
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT
(
input
);
CHECK_CUDA_INPUT
(
weight
);
CHECK_CUDA_INPUT
(
bias
);
CHECK_CUDA_INPUT
(
ones
);
CHECK_CUDA_INPUT
(
offset
);
CHECK_CUDA_INPUT
(
mask
);
CHECK_CUDA_INPUT
(
output
);
CHECK_CUDA_INPUT
(
columns
);
#else
AT_ERROR
(
"ModulatedDeformConv is not compiled with GPU support"
);
#endif
}
else
{
CHECK_CPU_INPUT
(
input
);
CHECK_CPU_INPUT
(
weight
);
CHECK_CPU_INPUT
(
bias
);
CHECK_CPU_INPUT
(
ones
);
CHECK_CPU_INPUT
(
offset
);
CHECK_CPU_INPUT
(
mask
);
CHECK_CPU_INPUT
(
output
);
CHECK_CPU_INPUT
(
columns
);
}
at
::
DeviceGuard
guard
(
input
.
device
());
const
int
batch
=
input
.
size
(
0
);
...
...
@@ -127,19 +92,10 @@ void modulated_deform_conv_forward(
output
.
size
(
2
),
output
.
size
(
3
)});
for
(
int
b
=
0
;
b
<
batch
;
b
++
)
{
if
(
input
.
device
().
is_cuda
())
{
#ifdef MMCV_WITH_CUDA
modulated_deformable_im2col_cuda
(
input
[
b
],
offset
[
b
],
mask
[
b
],
1
,
channels
,
height
,
width
,
height_out
,
width_out
,
kernel_h
,
kernel_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
deformable_group
,
columns
);
#endif
}
else
{
modulated_deformable_im2col_cpu
(
input
[
b
],
offset
[
b
],
mask
[
b
],
1
,
channels
,
height
,
width
,
height_out
,
width_out
,
kernel_h
,
kernel_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
deformable_group
,
columns
);
}
modulated_deformable_im2col_impl
(
input
[
b
],
offset
[
b
],
mask
[
b
],
1
,
channels
,
height
,
width
,
height_out
,
width_out
,
kernel_h
,
kernel_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
deformable_group
,
columns
);
// divide into group
weight
=
weight
.
view
({
group
,
weight
.
size
(
0
)
/
group
,
weight
.
size
(
1
),
...
...
@@ -174,41 +130,6 @@ void modulated_deform_conv_backward(
int
kernel_h
,
int
kernel_w
,
int
stride_h
,
int
stride_w
,
int
pad_h
,
int
pad_w
,
int
dilation_h
,
int
dilation_w
,
int
group
,
int
deformable_group
,
const
bool
with_bias
)
{
if
(
input
.
device
().
is_cuda
())
{
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT
(
input
);
CHECK_CUDA_INPUT
(
weight
);
CHECK_CUDA_INPUT
(
bias
);
CHECK_CUDA_INPUT
(
ones
);
CHECK_CUDA_INPUT
(
offset
);
CHECK_CUDA_INPUT
(
mask
);
CHECK_CUDA_INPUT
(
columns
);
CHECK_CUDA_INPUT
(
grad_input
);
CHECK_CUDA_INPUT
(
grad_weight
);
CHECK_CUDA_INPUT
(
grad_bias
);
CHECK_CUDA_INPUT
(
grad_offset
);
CHECK_CUDA_INPUT
(
grad_mask
);
CHECK_CUDA_INPUT
(
grad_output
);
#else
AT_ERROR
(
"ModulatedDeformConv is not compiled with GPU support"
);
#endif
}
else
{
CHECK_CPU_INPUT
(
input
);
CHECK_CPU_INPUT
(
weight
);
CHECK_CPU_INPUT
(
bias
);
CHECK_CPU_INPUT
(
ones
);
CHECK_CPU_INPUT
(
offset
);
CHECK_CPU_INPUT
(
mask
);
CHECK_CPU_INPUT
(
columns
);
CHECK_CPU_INPUT
(
grad_input
);
CHECK_CPU_INPUT
(
grad_weight
);
CHECK_CPU_INPUT
(
grad_bias
);
CHECK_CPU_INPUT
(
grad_offset
);
CHECK_CPU_INPUT
(
grad_mask
);
CHECK_CPU_INPUT
(
grad_output
);
}
at
::
DeviceGuard
guard
(
input
.
device
());
const
int
batch
=
input
.
size
(
0
);
...
...
@@ -261,46 +182,24 @@ void modulated_deform_conv_backward(
weight
=
weight
.
view
({
weight
.
size
(
0
)
*
weight
.
size
(
1
),
weight
.
size
(
2
),
weight
.
size
(
3
),
weight
.
size
(
4
)});
if
(
input
.
device
().
is_cuda
())
{
#ifdef MMCV_WITH_CUDA
// gradient w.r.t. input coordinate data
modulated_deformable_col2im_coord_cuda
(
columns
,
input
[
b
],
offset
[
b
],
mask
[
b
],
1
,
channels
,
height
,
width
,
height_out
,
width_out
,
kernel_h
,
kernel_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
deformable_group
,
grad_offset
[
b
],
grad_mask
[
b
]);
// gradient w.r.t. input data
modulated_deformable_col2im_cuda
(
columns
,
offset
[
b
],
mask
[
b
],
1
,
channels
,
height
,
width
,
height_out
,
width_out
,
kernel_h
,
kernel_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
deformable_group
,
grad_input
[
b
]);
// gradient w.r.t. weight, dWeight should accumulate across the batch and
// group
modulated_deformable_im2col_cuda
(
input
[
b
],
offset
[
b
],
mask
[
b
],
1
,
channels
,
height
,
width
,
height_out
,
width_out
,
kernel_h
,
kernel_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
deformable_group
,
columns
);
#endif
}
else
{
// gradient w.r.t. input coordinate data
modulated_deformable_col2im_coord_cpu
(
columns
,
input
[
b
],
offset
[
b
],
mask
[
b
],
1
,
channels
,
height
,
width
,
height_out
,
width_out
,
kernel_h
,
kernel_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
deformable_group
,
grad_offset
[
b
],
grad_mask
[
b
]);
// gradient w.r.t. input data
modulated_deformable_col2im_cpu
(
columns
,
offset
[
b
],
mask
[
b
],
1
,
channels
,
height
,
width
,
height_out
,
width_out
,
kernel_h
,
kernel_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
deformable_group
,
grad_input
[
b
]);
// gradient w.r.t. weight, dWeight should accumulate across the batch and
// group
modulated_deformable_im2col_cpu
(
input
[
b
],
offset
[
b
],
mask
[
b
],
1
,
channels
,
height
,
width
,
height_out
,
width_out
,
kernel_h
,
kernel_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
deformable_group
,
columns
);
}
// gradient w.r.t. input coordinate data
modulated_deformable_col2im_coord_impl
(
columns
,
input
[
b
],
offset
[
b
],
mask
[
b
],
1
,
channels
,
height
,
width
,
height_out
,
width_out
,
kernel_h
,
kernel_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
deformable_group
,
grad_offset
[
b
],
grad_mask
[
b
]);
// gradient w.r.t. input data
modulated_deformable_col2im_impl
(
columns
,
offset
[
b
],
mask
[
b
],
1
,
channels
,
height
,
width
,
height_out
,
width_out
,
kernel_h
,
kernel_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
deformable_group
,
grad_input
[
b
]);
// gradient w.r.t. weight, dWeight should accumulate across the batch and
// group
modulated_deformable_im2col_impl
(
input
[
b
],
offset
[
b
],
mask
[
b
],
1
,
channels
,
height
,
width
,
height_out
,
width_out
,
kernel_h
,
kernel_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
deformable_group
,
columns
);
columns
=
columns
.
view
({
group
,
columns
.
size
(
0
)
/
group
,
columns
.
size
(
1
)});
grad_weight
=
grad_weight
.
view
({
group
,
grad_weight
.
size
(
0
)
/
group
,
...
...
mmcv/ops/csrc/parrots/modulated_deform_conv_cpu.cpp
deleted
100644 → 0
View file @
df465820
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
template
<
typename
T
>
T
dmcn_im2col_bilinear_cpu
(
const
T
*
input
,
const
int
data_width
,
const
int
height
,
const
int
width
,
T
h
,
T
w
)
{
int
h_low
=
floorf
(
h
);
int
w_low
=
floorf
(
w
);
int
h_high
=
h_low
+
1
;
int
w_high
=
w_low
+
1
;
T
lh
=
h
-
h_low
;
T
lw
=
w
-
w_low
;
T
hh
=
1
-
lh
,
hw
=
1
-
lw
;
T
v1
=
0
;
if
(
h_low
>=
0
&&
w_low
>=
0
)
v1
=
input
[
h_low
*
data_width
+
w_low
];
T
v2
=
0
;
if
(
h_low
>=
0
&&
w_high
<=
width
-
1
)
v2
=
input
[
h_low
*
data_width
+
w_high
];
T
v3
=
0
;
if
(
h_high
<=
height
-
1
&&
w_low
>=
0
)
v3
=
input
[
h_high
*
data_width
+
w_low
];
T
v4
=
0
;
if
(
h_high
<=
height
-
1
&&
w_high
<=
width
-
1
)
v4
=
input
[
h_high
*
data_width
+
w_high
];
T
w1
=
hh
*
hw
,
w2
=
hh
*
lw
,
w3
=
lh
*
hw
,
w4
=
lh
*
lw
;
T
val
=
(
w1
*
v1
+
w2
*
v2
+
w3
*
v3
+
w4
*
v4
);
return
val
;
}
template
<
typename
T
>
T
dmcn_get_gradient_weight_cpu
(
T
argmax_h
,
T
argmax_w
,
const
int
h
,
const
int
w
,
const
int
height
,
const
int
width
)
{
if
(
argmax_h
<=
-
1
||
argmax_h
>=
height
||
argmax_w
<=
-
1
||
argmax_w
>=
width
)
{
// empty
return
0
;
}
int
argmax_h_low
=
floorf
(
argmax_h
);
int
argmax_w_low
=
floorf
(
argmax_w
);
int
argmax_h_high
=
argmax_h_low
+
1
;
int
argmax_w_high
=
argmax_w_low
+
1
;
T
weight
=
0
;
if
(
h
==
argmax_h_low
&&
w
==
argmax_w_low
)
weight
=
(
h
+
1
-
argmax_h
)
*
(
w
+
1
-
argmax_w
);
if
(
h
==
argmax_h_low
&&
w
==
argmax_w_high
)
weight
=
(
h
+
1
-
argmax_h
)
*
(
argmax_w
+
1
-
w
);
if
(
h
==
argmax_h_high
&&
w
==
argmax_w_low
)
weight
=
(
argmax_h
+
1
-
h
)
*
(
w
+
1
-
argmax_w
);
if
(
h
==
argmax_h_high
&&
w
==
argmax_w_high
)
weight
=
(
argmax_h
+
1
-
h
)
*
(
argmax_w
+
1
-
w
);
return
weight
;
}
template
<
typename
T
>
T
dmcn_get_coordinate_weight_cpu
(
T
argmax_h
,
T
argmax_w
,
const
int
height
,
const
int
width
,
const
T
*
im_data
,
const
int
data_width
,
const
int
bp_dir
)
{
if
(
argmax_h
<=
-
1
||
argmax_h
>=
height
||
argmax_w
<=
-
1
||
argmax_w
>=
width
)
{
// empty
return
0
;
}
int
argmax_h_low
=
floorf
(
argmax_h
);
int
argmax_w_low
=
floorf
(
argmax_w
);
int
argmax_h_high
=
argmax_h_low
+
1
;
int
argmax_w_high
=
argmax_w_low
+
1
;
T
weight
=
0
;
if
(
bp_dir
==
0
)
{
if
(
argmax_h_low
>=
0
&&
argmax_w_low
>=
0
)
weight
+=
-
1
*
(
argmax_w_low
+
1
-
argmax_w
)
*
im_data
[
argmax_h_low
*
data_width
+
argmax_w_low
];
if
(
argmax_h_low
>=
0
&&
argmax_w_high
<=
width
-
1
)
weight
+=
-
1
*
(
argmax_w
-
argmax_w_low
)
*
im_data
[
argmax_h_low
*
data_width
+
argmax_w_high
];
if
(
argmax_h_high
<=
height
-
1
&&
argmax_w_low
>=
0
)
weight
+=
(
argmax_w_low
+
1
-
argmax_w
)
*
im_data
[
argmax_h_high
*
data_width
+
argmax_w_low
];
if
(
argmax_h_high
<=
height
-
1
&&
argmax_w_high
<=
width
-
1
)
weight
+=
(
argmax_w
-
argmax_w_low
)
*
im_data
[
argmax_h_high
*
data_width
+
argmax_w_high
];
}
else
if
(
bp_dir
==
1
)
{
if
(
argmax_h_low
>=
0
&&
argmax_w_low
>=
0
)
weight
+=
-
1
*
(
argmax_h_low
+
1
-
argmax_h
)
*
im_data
[
argmax_h_low
*
data_width
+
argmax_w_low
];
if
(
argmax_h_low
>=
0
&&
argmax_w_high
<=
width
-
1
)
weight
+=
(
argmax_h_low
+
1
-
argmax_h
)
*
im_data
[
argmax_h_low
*
data_width
+
argmax_w_high
];
if
(
argmax_h_high
<=
height
-
1
&&
argmax_w_low
>=
0
)
weight
+=
-
1
*
(
argmax_h
-
argmax_h_low
)
*
im_data
[
argmax_h_high
*
data_width
+
argmax_w_low
];
if
(
argmax_h_high
<=
height
-
1
&&
argmax_w_high
<=
width
-
1
)
weight
+=
(
argmax_h
-
argmax_h_low
)
*
im_data
[
argmax_h_high
*
data_width
+
argmax_w_high
];
}
return
weight
;
}
template
<
typename
T
>
void
modulated_deformable_im2col_cpu_kernel
(
const
int
n
,
const
T
*
data_im
,
const
T
*
data_offset
,
const
T
*
data_mask
,
const
int
height
,
const
int
width
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
channel_per_deformable_group
,
const
int
batch_size
,
const
int
num_channels
,
const
int
deformable_group
,
const
int
height_col
,
const
int
width_col
,
T
*
data_col
)
{
for
(
int
index
=
0
;
index
<
n
;
index
++
)
{
// index index of output matrix
const
int
w_col
=
index
%
width_col
;
const
int
h_col
=
(
index
/
width_col
)
%
height_col
;
const
int
b_col
=
(
index
/
width_col
/
height_col
)
%
batch_size
;
const
int
c_im
=
(
index
/
width_col
/
height_col
)
/
batch_size
;
const
int
c_col
=
c_im
*
kernel_h
*
kernel_w
;
// compute deformable group index
const
int
deformable_group_index
=
c_im
/
channel_per_deformable_group
;
const
int
h_in
=
h_col
*
stride_h
-
pad_h
;
const
int
w_in
=
w_col
*
stride_w
-
pad_w
;
T
*
data_col_ptr
=
data_col
+
((
c_col
*
batch_size
+
b_col
)
*
height_col
+
h_col
)
*
width_col
+
w_col
;
const
T
*
data_im_ptr
=
data_im
+
(
b_col
*
num_channels
+
c_im
)
*
height
*
width
;
const
T
*
data_offset_ptr
=
data_offset
+
(
b_col
*
deformable_group
+
deformable_group_index
)
*
2
*
kernel_h
*
kernel_w
*
height_col
*
width_col
;
const
T
*
data_mask_ptr
=
data_mask
+
(
b_col
*
deformable_group
+
deformable_group_index
)
*
kernel_h
*
kernel_w
*
height_col
*
width_col
;
for
(
int
i
=
0
;
i
<
kernel_h
;
++
i
)
{
for
(
int
j
=
0
;
j
<
kernel_w
;
++
j
)
{
const
int
data_offset_h_ptr
=
((
2
*
(
i
*
kernel_w
+
j
))
*
height_col
+
h_col
)
*
width_col
+
w_col
;
const
int
data_offset_w_ptr
=
((
2
*
(
i
*
kernel_w
+
j
)
+
1
)
*
height_col
+
h_col
)
*
width_col
+
w_col
;
const
int
data_mask_hw_ptr
=
((
i
*
kernel_w
+
j
)
*
height_col
+
h_col
)
*
width_col
+
w_col
;
const
T
offset_h
=
data_offset_ptr
[
data_offset_h_ptr
];
const
T
offset_w
=
data_offset_ptr
[
data_offset_w_ptr
];
const
T
mask
=
data_mask_ptr
[
data_mask_hw_ptr
];
T
val
=
static_cast
<
T
>
(
0
);
const
T
h_im
=
h_in
+
i
*
dilation_h
+
offset_h
;
const
T
w_im
=
w_in
+
j
*
dilation_w
+
offset_w
;
if
(
h_im
>
-
1
&&
w_im
>
-
1
&&
h_im
<
height
&&
w_im
<
width
)
val
=
dmcn_im2col_bilinear_cpu
(
data_im_ptr
,
width
,
height
,
width
,
h_im
,
w_im
);
*
data_col_ptr
=
val
*
mask
;
data_col_ptr
+=
batch_size
*
height_col
*
width_col
;
}
}
}
}
template
<
typename
T
>
void
modulated_deformable_col2im_cpu_kernel
(
const
int
n
,
const
T
*
data_col
,
const
T
*
data_offset
,
const
T
*
data_mask
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
channel_per_deformable_group
,
const
int
batch_size
,
const
int
deformable_group
,
const
int
height_col
,
const
int
width_col
,
T
*
grad_im
)
{
for
(
int
index
=
0
;
index
<
n
;
index
++
)
{
const
int
j
=
(
index
/
width_col
/
height_col
/
batch_size
)
%
kernel_w
;
const
int
i
=
(
index
/
width_col
/
height_col
/
batch_size
/
kernel_w
)
%
kernel_h
;
const
int
c
=
index
/
width_col
/
height_col
/
batch_size
/
kernel_w
/
kernel_h
;
// compute the start and end of the output
const
int
deformable_group_index
=
c
/
channel_per_deformable_group
;
int
w_out
=
index
%
width_col
;
int
h_out
=
(
index
/
width_col
)
%
height_col
;
int
b
=
(
index
/
width_col
/
height_col
)
%
batch_size
;
int
w_in
=
w_out
*
stride_w
-
pad_w
;
int
h_in
=
h_out
*
stride_h
-
pad_h
;
const
T
*
data_offset_ptr
=
data_offset
+
(
b
*
deformable_group
+
deformable_group_index
)
*
2
*
kernel_h
*
kernel_w
*
height_col
*
width_col
;
const
T
*
data_mask_ptr
=
data_mask
+
(
b
*
deformable_group
+
deformable_group_index
)
*
kernel_h
*
kernel_w
*
height_col
*
width_col
;
const
int
data_offset_h_ptr
=
((
2
*
(
i
*
kernel_w
+
j
))
*
height_col
+
h_out
)
*
width_col
+
w_out
;
const
int
data_offset_w_ptr
=
((
2
*
(
i
*
kernel_w
+
j
)
+
1
)
*
height_col
+
h_out
)
*
width_col
+
w_out
;
const
int
data_mask_hw_ptr
=
((
i
*
kernel_w
+
j
)
*
height_col
+
h_out
)
*
width_col
+
w_out
;
const
T
offset_h
=
data_offset_ptr
[
data_offset_h_ptr
];
const
T
offset_w
=
data_offset_ptr
[
data_offset_w_ptr
];
const
T
mask
=
data_mask_ptr
[
data_mask_hw_ptr
];
const
T
cur_inv_h_data
=
h_in
+
i
*
dilation_h
+
offset_h
;
const
T
cur_inv_w_data
=
w_in
+
j
*
dilation_w
+
offset_w
;
const
T
cur_top_grad
=
data_col
[
index
]
*
mask
;
const
int
cur_h
=
(
int
)
cur_inv_h_data
;
const
int
cur_w
=
(
int
)
cur_inv_w_data
;
for
(
int
dy
=
-
2
;
dy
<=
2
;
dy
++
)
{
for
(
int
dx
=
-
2
;
dx
<=
2
;
dx
++
)
{
if
(
cur_h
+
dy
>=
0
&&
cur_h
+
dy
<
height
&&
cur_w
+
dx
>=
0
&&
cur_w
+
dx
<
width
&&
abs
(
cur_inv_h_data
-
(
cur_h
+
dy
))
<
1
&&
abs
(
cur_inv_w_data
-
(
cur_w
+
dx
))
<
1
)
{
int
cur_bottom_grad_pos
=
((
b
*
channels
+
c
)
*
height
+
cur_h
+
dy
)
*
width
+
cur_w
+
dx
;
T
weight
=
dmcn_get_gradient_weight_cpu
(
cur_inv_h_data
,
cur_inv_w_data
,
cur_h
+
dy
,
cur_w
+
dx
,
height
,
width
);
*
(
grad_im
+
cur_bottom_grad_pos
)
+=
weight
*
cur_top_grad
;
}
}
}
}
}
template
<
typename
T
>
void
modulated_deformable_col2im_coord_cpu_kernel
(
const
int
n
,
const
T
*
data_col
,
const
T
*
data_im
,
const
T
*
data_offset
,
const
T
*
data_mask
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
channel_per_deformable_group
,
const
int
batch_size
,
const
int
offset_channels
,
const
int
deformable_group
,
const
int
height_col
,
const
int
width_col
,
T
*
grad_offset
,
T
*
grad_mask
)
{
for
(
int
index
=
0
;
index
<
n
;
index
++
)
{
T
val
=
0
,
mval
=
0
;
int
w
=
index
%
width_col
;
int
h
=
(
index
/
width_col
)
%
height_col
;
int
c
=
(
index
/
width_col
/
height_col
)
%
offset_channels
;
int
b
=
(
index
/
width_col
/
height_col
)
/
offset_channels
;
// compute the start and end of the output
const
int
deformable_group_index
=
c
/
(
2
*
kernel_h
*
kernel_w
);
const
int
col_step
=
kernel_h
*
kernel_w
;
int
cnt
=
0
;
const
T
*
data_col_ptr
=
data_col
+
deformable_group_index
*
channel_per_deformable_group
*
batch_size
*
width_col
*
height_col
;
const
T
*
data_im_ptr
=
data_im
+
(
b
*
deformable_group
+
deformable_group_index
)
*
channel_per_deformable_group
/
kernel_h
/
kernel_w
*
height
*
width
;
const
T
*
data_offset_ptr
=
data_offset
+
(
b
*
deformable_group
+
deformable_group_index
)
*
2
*
kernel_h
*
kernel_w
*
height_col
*
width_col
;
const
T
*
data_mask_ptr
=
data_mask
+
(
b
*
deformable_group
+
deformable_group_index
)
*
kernel_h
*
kernel_w
*
height_col
*
width_col
;
const
int
offset_c
=
c
-
deformable_group_index
*
2
*
kernel_h
*
kernel_w
;
for
(
int
col_c
=
(
offset_c
/
2
);
col_c
<
channel_per_deformable_group
;
col_c
+=
col_step
)
{
const
int
col_pos
=
(((
col_c
*
batch_size
+
b
)
*
height_col
)
+
h
)
*
width_col
+
w
;
const
int
bp_dir
=
offset_c
%
2
;
int
j
=
(
col_pos
/
width_col
/
height_col
/
batch_size
)
%
kernel_w
;
int
i
=
(
col_pos
/
width_col
/
height_col
/
batch_size
/
kernel_w
)
%
kernel_h
;
int
w_out
=
col_pos
%
width_col
;
int
h_out
=
(
col_pos
/
width_col
)
%
height_col
;
int
w_in
=
w_out
*
stride_w
-
pad_w
;
int
h_in
=
h_out
*
stride_h
-
pad_h
;
const
int
data_offset_h_ptr
=
(((
2
*
(
i
*
kernel_w
+
j
))
*
height_col
+
h_out
)
*
width_col
+
w_out
);
const
int
data_offset_w_ptr
=
(((
2
*
(
i
*
kernel_w
+
j
)
+
1
)
*
height_col
+
h_out
)
*
width_col
+
w_out
);
const
int
data_mask_hw_ptr
=
(((
i
*
kernel_w
+
j
)
*
height_col
+
h_out
)
*
width_col
+
w_out
);
const
T
offset_h
=
data_offset_ptr
[
data_offset_h_ptr
];
const
T
offset_w
=
data_offset_ptr
[
data_offset_w_ptr
];
const
T
mask
=
data_mask_ptr
[
data_mask_hw_ptr
];
T
inv_h
=
h_in
+
i
*
dilation_h
+
offset_h
;
T
inv_w
=
w_in
+
j
*
dilation_w
+
offset_w
;
if
(
inv_h
<=
-
1
||
inv_w
<=
-
1
||
inv_h
>=
height
||
inv_w
>=
width
)
inv_h
=
inv_w
=
-
2
;
else
mval
+=
data_col_ptr
[
col_pos
]
*
dmcn_im2col_bilinear_cpu
(
data_im_ptr
+
cnt
*
height
*
width
,
width
,
height
,
width
,
inv_h
,
inv_w
);
const
T
weight
=
dmcn_get_coordinate_weight_cpu
(
inv_h
,
inv_w
,
height
,
width
,
data_im_ptr
+
cnt
*
height
*
width
,
width
,
bp_dir
);
val
+=
weight
*
data_col_ptr
[
col_pos
]
*
mask
;
cnt
+=
1
;
}
// KERNEL_ASSIGN(grad_offset[index], offset_req, val);
grad_offset
[
index
]
=
val
;
if
(
offset_c
%
2
==
0
)
// KERNEL_ASSIGN(grad_mask[(((b * deformable_group +
// deformable_group_index) * kernel_h * kernel_w + offset_c / 2) *
// height_col + h) * width_col + w], mask_req, mval);
grad_mask
[(((
b
*
deformable_group
+
deformable_group_index
)
*
kernel_h
*
kernel_w
+
offset_c
/
2
)
*
height_col
+
h
)
*
width_col
+
w
]
=
mval
;
}
}
void
modulated_deformable_im2col_cpu
(
const
Tensor
data_im
,
const
Tensor
data_offset
,
const
Tensor
data_mask
,
const
int
batch_size
,
const
int
channels
,
const
int
height_im
,
const
int
width_im
,
const
int
height_col
,
const
int
width_col
,
const
int
kernel_h
,
const
int
kenerl_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
deformable_group
,
Tensor
data_col
)
{
// num_axes should be smaller than block size
const
int
channel_per_deformable_group
=
channels
/
deformable_group
;
const
int
num_kernels
=
channels
*
batch_size
*
height_col
*
width_col
;
AT_DISPATCH_FLOATING_TYPES_AND_HALF
(
data_im
.
scalar_type
(),
"modulated_deformable_im2col_cpu"
,
([
&
]
{
const
scalar_t
*
data_im_
=
data_im
.
data_ptr
<
scalar_t
>
();
const
scalar_t
*
data_offset_
=
data_offset
.
data_ptr
<
scalar_t
>
();
const
scalar_t
*
data_mask_
=
data_mask
.
data_ptr
<
scalar_t
>
();
scalar_t
*
data_col_
=
data_col
.
data_ptr
<
scalar_t
>
();
modulated_deformable_im2col_cpu_kernel
(
num_kernels
,
data_im_
,
data_offset_
,
data_mask_
,
height_im
,
width_im
,
kernel_h
,
kenerl_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
channel_per_deformable_group
,
batch_size
,
channels
,
deformable_group
,
height_col
,
width_col
,
data_col_
);
}));
}
void
modulated_deformable_col2im_cpu
(
const
Tensor
data_col
,
const
Tensor
data_offset
,
const
Tensor
data_mask
,
const
int
batch_size
,
const
int
channels
,
const
int
height_im
,
const
int
width_im
,
const
int
height_col
,
const
int
width_col
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
deformable_group
,
Tensor
grad_im
)
{
const
int
channel_per_deformable_group
=
channels
/
deformable_group
;
const
int
num_kernels
=
channels
*
kernel_h
*
kernel_w
*
batch_size
*
height_col
*
width_col
;
AT_DISPATCH_FLOATING_TYPES_AND_HALF
(
data_col
.
scalar_type
(),
"modulated_deformable_col2im_cpu"
,
([
&
]
{
const
scalar_t
*
data_col_
=
data_col
.
data_ptr
<
scalar_t
>
();
const
scalar_t
*
data_offset_
=
data_offset
.
data_ptr
<
scalar_t
>
();
const
scalar_t
*
data_mask_
=
data_mask
.
data_ptr
<
scalar_t
>
();
scalar_t
*
grad_im_
=
grad_im
.
data_ptr
<
scalar_t
>
();
modulated_deformable_col2im_cpu_kernel
(
num_kernels
,
data_col_
,
data_offset_
,
data_mask_
,
channels
,
height_im
,
width_im
,
kernel_h
,
kernel_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
channel_per_deformable_group
,
batch_size
,
deformable_group
,
height_col
,
width_col
,
grad_im_
);
}));
}
void
modulated_deformable_col2im_coord_cpu
(
const
Tensor
data_col
,
const
Tensor
data_im
,
const
Tensor
data_offset
,
const
Tensor
data_mask
,
const
int
batch_size
,
const
int
channels
,
const
int
height_im
,
const
int
width_im
,
const
int
height_col
,
const
int
width_col
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
deformable_group
,
Tensor
grad_offset
,
Tensor
grad_mask
)
{
const
int
num_kernels
=
batch_size
*
height_col
*
width_col
*
2
*
kernel_h
*
kernel_w
*
deformable_group
;
const
int
channel_per_deformable_group
=
channels
*
kernel_h
*
kernel_w
/
deformable_group
;
AT_DISPATCH_FLOATING_TYPES_AND_HALF
(
data_col
.
scalar_type
(),
"modulated_deformable_col2im_coord_cpu"
,
([
&
]
{
const
scalar_t
*
data_col_
=
data_col
.
data_ptr
<
scalar_t
>
();
const
scalar_t
*
data_im_
=
data_im
.
data_ptr
<
scalar_t
>
();
const
scalar_t
*
data_offset_
=
data_offset
.
data_ptr
<
scalar_t
>
();
const
scalar_t
*
data_mask_
=
data_mask
.
data_ptr
<
scalar_t
>
();
scalar_t
*
grad_offset_
=
grad_offset
.
data_ptr
<
scalar_t
>
();
scalar_t
*
grad_mask_
=
grad_mask
.
data_ptr
<
scalar_t
>
();
modulated_deformable_col2im_coord_cpu_kernel
(
num_kernels
,
data_col_
,
data_im_
,
data_offset_
,
data_mask_
,
channels
,
height_im
,
width_im
,
kernel_h
,
kernel_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
channel_per_deformable_group
,
batch_size
,
2
*
kernel_h
*
kernel_w
*
deformable_group
,
deformable_group
,
height_col
,
width_col
,
grad_offset_
,
grad_mask_
);
}));
}
mmcv/ops/csrc/parrots/ms_deform_attn.cpp
View file @
fdeee889
...
...
@@ -10,42 +10,39 @@
*/
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
Tensor
ms_deform_attn_cuda_forward
(
const
Tensor
&
value
,
Tensor
ms_deform_attn_impl_forward
(
const
Tensor
&
value
,
const
Tensor
&
spatial_shapes
,
const
Tensor
&
level_start_index
,
const
Tensor
&
sampling_loc
,
const
Tensor
&
attn_weight
,
const
int
im2col_step
);
const
int
im2col_step
)
{
return
DISPATCH_DEVICE_IMPL
(
ms_deform_attn_impl_forward
,
value
,
spatial_shapes
,
level_start_index
,
sampling_loc
,
attn_weight
,
im2col_step
);
}
void
ms_deform_attn_
cuda
_backward
(
void
ms_deform_attn_
impl
_backward
(
const
Tensor
&
value
,
const
Tensor
&
spatial_shapes
,
const
Tensor
&
level_start_index
,
const
Tensor
&
sampling_loc
,
const
Tensor
&
attn_weight
,
const
Tensor
&
grad_output
,
Tensor
&
grad_value
,
Tensor
&
grad_sampling_loc
,
Tensor
&
grad_attn_weight
,
const
int
im2col_step
);
#endif
Tensor
&
grad_sampling_loc
,
Tensor
&
grad_attn_weight
,
const
int
im2col_step
)
{
DISPATCH_DEVICE_IMPL
(
ms_deform_attn_impl_backward
,
value
,
spatial_shapes
,
level_start_index
,
sampling_loc
,
attn_weight
,
grad_output
,
grad_value
,
grad_sampling_loc
,
grad_attn_weight
,
im2col_step
);
}
Tensor
ms_deform_attn_forward
(
const
Tensor
&
value
,
const
Tensor
&
spatial_shapes
,
const
Tensor
&
level_start_index
,
const
Tensor
&
sampling_loc
,
const
Tensor
&
attn_weight
,
const
int
im2col_step
)
{
if
(
value
.
type
().
is_cuda
())
{
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT
(
value
)
CHECK_CUDA_INPUT
(
spatial_shapes
)
CHECK_CUDA_INPUT
(
level_start_index
)
CHECK_CUDA_INPUT
(
sampling_loc
)
CHECK_CUDA_INPUT
(
attn_weight
)
return
ms_deform_attn_cuda_forward
(
value
,
spatial_shapes
,
level_start_index
,
sampling_loc
,
attn_weight
,
im2col_step
);
#else
AT_ERROR
(
"Not compiled with GPU support"
);
#endif
}
AT_ERROR
(
"Not implemented on the CPU"
);
at
::
DeviceGuard
guard
(
value
.
device
());
return
ms_deform_attn_impl_forward
(
value
,
spatial_shapes
,
level_start_index
,
sampling_loc
,
attn_weight
,
im2col_step
);
}
void
ms_deform_attn_backward
(
const
Tensor
&
value
,
const
Tensor
&
spatial_shapes
,
...
...
@@ -55,25 +52,9 @@ void ms_deform_attn_backward(const Tensor &value, const Tensor &spatial_shapes,
const
Tensor
&
grad_output
,
Tensor
&
grad_value
,
Tensor
&
grad_sampling_loc
,
Tensor
&
grad_attn_weight
,
const
int
im2col_step
)
{
if
(
value
.
type
().
is_cuda
())
{
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT
(
value
)
CHECK_CUDA_INPUT
(
spatial_shapes
)
CHECK_CUDA_INPUT
(
level_start_index
)
CHECK_CUDA_INPUT
(
sampling_loc
)
CHECK_CUDA_INPUT
(
attn_weight
)
CHECK_CUDA_INPUT
(
grad_output
)
CHECK_CUDA_INPUT
(
grad_value
)
CHECK_CUDA_INPUT
(
grad_sampling_loc
)
CHECK_CUDA_INPUT
(
grad_attn_weight
)
ms_deform_attn_cuda_backward
(
value
,
spatial_shapes
,
level_start_index
,
sampling_loc
,
attn_weight
,
grad_output
,
grad_value
,
grad_sampling_loc
,
grad_attn_weight
,
im2col_step
);
#else
AT_ERROR
(
"Not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"Not implemented on the CPU"
);
}
at
::
DeviceGuard
guard
(
value
.
device
());
ms_deform_attn_impl_backward
(
value
,
spatial_shapes
,
level_start_index
,
sampling_loc
,
attn_weight
,
grad_output
,
grad_value
,
grad_sampling_loc
,
grad_attn_weight
,
im2col_step
);
}
mmcv/ops/csrc/parrots/nms.cpp
View file @
fdeee889
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
Tensor
NMSCUDAKernelLauncher
(
Tensor
boxes
,
Tensor
scores
,
float
iou_threshold
,
int
offset
);
Tensor
nms_cuda
(
Tensor
boxes
,
Tensor
scores
,
float
iou_threshold
,
int
offset
)
{
return
NMSCUDAKernelLauncher
(
boxes
,
scores
,
iou_threshold
,
offset
);
Tensor
nms_impl
(
Tensor
boxes
,
Tensor
scores
,
float
iou_threshold
,
int
offset
)
{
return
DISPATCH_DEVICE_IMPL
(
nms_impl
,
boxes
,
scores
,
iou_threshold
,
offset
);
}
#endif
Tensor
nms_cpu
(
Tensor
boxes
,
Tensor
scores
,
float
iou_threshold
,
int
offset
)
{
if
(
boxes
.
numel
()
==
0
)
{
return
at
::
empty
({
0
},
boxes
.
options
().
dtype
(
at
::
kLong
));
}
auto
x1_t
=
boxes
.
select
(
1
,
0
).
contiguous
();
auto
y1_t
=
boxes
.
select
(
1
,
1
).
contiguous
();
auto
x2_t
=
boxes
.
select
(
1
,
2
).
contiguous
();
auto
y2_t
=
boxes
.
select
(
1
,
3
).
contiguous
();
Tensor
areas_t
=
(
x2_t
-
x1_t
+
offset
)
*
(
y2_t
-
y1_t
+
offset
);
auto
order_t
=
std
::
get
<
1
>
(
scores
.
sort
(
0
,
/* descending=*/
true
));
auto
nboxes
=
boxes
.
size
(
0
);
Tensor
select_t
=
at
::
ones
({
nboxes
},
boxes
.
options
().
dtype
(
at
::
kBool
));
auto
select
=
select_t
.
data_ptr
<
bool
>
();
auto
order
=
order_t
.
data_ptr
<
int64_t
>
();
auto
x1
=
x1_t
.
data_ptr
<
float
>
();
auto
y1
=
y1_t
.
data_ptr
<
float
>
();
auto
x2
=
x2_t
.
data_ptr
<
float
>
();
auto
y2
=
y2_t
.
data_ptr
<
float
>
();
auto
areas
=
areas_t
.
data_ptr
<
float
>
();
for
(
int64_t
_i
=
0
;
_i
<
nboxes
;
_i
++
)
{
if
(
select
[
_i
]
==
false
)
continue
;
auto
i
=
order
[
_i
];
auto
ix1
=
x1
[
i
];
auto
iy1
=
y1
[
i
];
auto
ix2
=
x2
[
i
];
auto
iy2
=
y2
[
i
];
auto
iarea
=
areas
[
i
];
for
(
int64_t
_j
=
_i
+
1
;
_j
<
nboxes
;
_j
++
)
{
if
(
select
[
_j
]
==
false
)
continue
;
auto
j
=
order
[
_j
];
auto
xx1
=
std
::
max
(
ix1
,
x1
[
j
]);
auto
yy1
=
std
::
max
(
iy1
,
y1
[
j
]);
auto
xx2
=
std
::
min
(
ix2
,
x2
[
j
]);
auto
yy2
=
std
::
min
(
iy2
,
y2
[
j
]);
auto
w
=
std
::
max
(
0.
f
,
xx2
-
xx1
+
offset
);
auto
h
=
std
::
max
(
0.
f
,
yy2
-
yy1
+
offset
);
auto
inter
=
w
*
h
;
auto
ovr
=
inter
/
(
iarea
+
areas
[
j
]
-
inter
);
if
(
ovr
>
iou_threshold
)
select
[
_j
]
=
false
;
}
}
return
order_t
.
masked_select
(
select_t
);
Tensor
softnms_impl
(
Tensor
boxes
,
Tensor
scores
,
Tensor
dets
,
float
iou_threshold
,
float
sigma
,
float
min_score
,
int
method
,
int
offset
)
{
return
DISPATCH_DEVICE_IMPL
(
softnms_impl
,
boxes
,
scores
,
dets
,
iou_threshold
,
sigma
,
min_score
,
method
,
offset
);
}
Tensor
nms
(
Tensor
boxes
,
Tensor
scores
,
float
iou_threshold
,
int
offset
)
{
if
(
boxes
.
device
().
is_cuda
())
{
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT
(
boxes
);
CHECK_CUDA_INPUT
(
scores
);
return
nms_cuda
(
boxes
,
scores
,
iou_threshold
,
offset
);
#else
AT_ERROR
(
"nms is not compiled with GPU support"
);
#endif
}
else
{
CHECK_CPU_INPUT
(
boxes
);
CHECK_CPU_INPUT
(
scores
);
return
nms_cpu
(
boxes
,
scores
,
iou_threshold
,
offset
);
}
std
::
vector
<
std
::
vector
<
int
>
>
nms_match_impl
(
Tensor
dets
,
float
iou_threshold
)
{
return
DISPATCH_DEVICE_IMPL
(
nms_match_impl
,
dets
,
iou_threshold
);
}
Tensor
softnms_cpu
(
Tensor
boxes
,
Tensor
scores
,
Tensor
dets
,
float
iou_threshold
,
float
sigma
,
float
min_score
,
int
method
,
int
offset
)
{
if
(
boxes
.
numel
()
==
0
)
{
return
at
::
empty
({
0
},
boxes
.
options
().
dtype
(
at
::
kLong
));
}
auto
x1_t
=
boxes
.
select
(
1
,
0
).
contiguous
();
auto
y1_t
=
boxes
.
select
(
1
,
1
).
contiguous
();
auto
x2_t
=
boxes
.
select
(
1
,
2
).
contiguous
();
auto
y2_t
=
boxes
.
select
(
1
,
3
).
contiguous
();
auto
scores_t
=
scores
.
clone
();
Tensor
areas_t
=
(
x2_t
-
x1_t
+
offset
)
*
(
y2_t
-
y1_t
+
offset
);
auto
nboxes
=
boxes
.
size
(
0
);
auto
x1
=
x1_t
.
data_ptr
<
float
>
();
auto
y1
=
y1_t
.
data_ptr
<
float
>
();
auto
x2
=
x2_t
.
data_ptr
<
float
>
();
auto
y2
=
y2_t
.
data_ptr
<
float
>
();
auto
sc
=
scores_t
.
data_ptr
<
float
>
();
auto
areas
=
areas_t
.
data_ptr
<
float
>
();
auto
de
=
dets
.
data_ptr
<
float
>
();
int64_t
pos
=
0
;
Tensor
inds_t
=
at
::
arange
(
nboxes
,
boxes
.
options
().
dtype
(
at
::
kLong
));
auto
inds
=
inds_t
.
data_ptr
<
int64_t
>
();
for
(
int64_t
i
=
0
;
i
<
nboxes
;
i
++
)
{
auto
max_score
=
sc
[
i
];
auto
max_pos
=
i
;
pos
=
i
+
1
;
// get max box
while
(
pos
<
nboxes
)
{
if
(
max_score
<
sc
[
pos
])
{
max_score
=
sc
[
pos
];
max_pos
=
pos
;
}
pos
=
pos
+
1
;
}
// swap
auto
ix1
=
de
[
i
*
5
+
0
]
=
x1
[
max_pos
];
auto
iy1
=
de
[
i
*
5
+
1
]
=
y1
[
max_pos
];
auto
ix2
=
de
[
i
*
5
+
2
]
=
x2
[
max_pos
];
auto
iy2
=
de
[
i
*
5
+
3
]
=
y2
[
max_pos
];
auto
iscore
=
de
[
i
*
5
+
4
]
=
sc
[
max_pos
];
auto
iarea
=
areas
[
max_pos
];
auto
iind
=
inds
[
max_pos
];
x1
[
max_pos
]
=
x1
[
i
];
y1
[
max_pos
]
=
y1
[
i
];
x2
[
max_pos
]
=
x2
[
i
];
y2
[
max_pos
]
=
y2
[
i
];
sc
[
max_pos
]
=
sc
[
i
];
areas
[
max_pos
]
=
areas
[
i
];
inds
[
max_pos
]
=
inds
[
i
];
x1
[
i
]
=
ix1
;
y1
[
i
]
=
iy1
;
x2
[
i
]
=
ix2
;
y2
[
i
]
=
iy2
;
sc
[
i
]
=
iscore
;
areas
[
i
]
=
iarea
;
inds
[
i
]
=
iind
;
pos
=
i
+
1
;
while
(
pos
<
nboxes
)
{
auto
xx1
=
std
::
max
(
ix1
,
x1
[
pos
]);
auto
yy1
=
std
::
max
(
iy1
,
y1
[
pos
]);
auto
xx2
=
std
::
min
(
ix2
,
x2
[
pos
]);
auto
yy2
=
std
::
min
(
iy2
,
y2
[
pos
]);
auto
w
=
std
::
max
(
0.
f
,
xx2
-
xx1
+
offset
);
auto
h
=
std
::
max
(
0.
f
,
yy2
-
yy1
+
offset
);
auto
inter
=
w
*
h
;
auto
ovr
=
inter
/
(
iarea
+
areas
[
pos
]
-
inter
);
float
weight
=
1.
;
if
(
method
==
0
)
{
if
(
ovr
>=
iou_threshold
)
weight
=
0
;
}
else
if
(
method
==
1
)
{
if
(
ovr
>=
iou_threshold
)
weight
=
1
-
ovr
;
}
else
if
(
method
==
2
)
{
weight
=
std
::
exp
(
-
(
ovr
*
ovr
)
/
sigma
);
}
sc
[
pos
]
*=
weight
;
// if box score falls below threshold, discard the box by
// swapping with last box update N
if
(
sc
[
pos
]
<
min_score
)
{
x1
[
pos
]
=
x1
[
nboxes
-
1
];
y1
[
pos
]
=
y1
[
nboxes
-
1
];
x2
[
pos
]
=
x2
[
nboxes
-
1
];
y2
[
pos
]
=
y2
[
nboxes
-
1
];
sc
[
pos
]
=
sc
[
nboxes
-
1
];
areas
[
pos
]
=
areas
[
nboxes
-
1
];
inds
[
pos
]
=
inds
[
nboxes
-
1
];
nboxes
=
nboxes
-
1
;
pos
=
pos
-
1
;
}
pos
=
pos
+
1
;
}
}
return
inds_t
.
slice
(
0
,
0
,
nboxes
);
Tensor
nms
(
Tensor
boxes
,
Tensor
scores
,
float
iou_threshold
,
int
offset
)
{
return
nms_impl
(
boxes
,
scores
,
iou_threshold
,
offset
);
}
Tensor
softnms
(
Tensor
boxes
,
Tensor
scores
,
Tensor
dets
,
float
iou_threshold
,
float
sigma
,
float
min_score
,
int
method
,
int
offset
)
{
if
(
boxes
.
device
().
is_cuda
())
{
AT_ERROR
(
"softnms is not implemented on GPU"
);
}
else
{
return
softnms_cpu
(
boxes
,
scores
,
dets
,
iou_threshold
,
sigma
,
min_score
,
method
,
offset
);
}
}
std
::
vector
<
std
::
vector
<
int
>
>
nms_match_cpu
(
Tensor
dets
,
float
iou_threshold
)
{
auto
x1_t
=
dets
.
select
(
1
,
0
).
contiguous
();
auto
y1_t
=
dets
.
select
(
1
,
1
).
contiguous
();
auto
x2_t
=
dets
.
select
(
1
,
2
).
contiguous
();
auto
y2_t
=
dets
.
select
(
1
,
3
).
contiguous
();
auto
scores
=
dets
.
select
(
1
,
4
).
contiguous
();
at
::
Tensor
areas_t
=
(
x2_t
-
x1_t
)
*
(
y2_t
-
y1_t
);
auto
order_t
=
std
::
get
<
1
>
(
scores
.
sort
(
0
,
/* descending=*/
true
));
auto
ndets
=
dets
.
size
(
0
);
at
::
Tensor
suppressed_t
=
at
::
zeros
({
ndets
},
dets
.
options
().
dtype
(
at
::
kByte
).
device
(
at
::
kCPU
));
auto
suppressed
=
suppressed_t
.
data_ptr
<
uint8_t
>
();
auto
order
=
order_t
.
data_ptr
<
int64_t
>
();
auto
x1
=
x1_t
.
data_ptr
<
float
>
();
auto
y1
=
y1_t
.
data_ptr
<
float
>
();
auto
x2
=
x2_t
.
data_ptr
<
float
>
();
auto
y2
=
y2_t
.
data_ptr
<
float
>
();
auto
areas
=
areas_t
.
data_ptr
<
float
>
();
std
::
vector
<
int
>
keep
;
std
::
vector
<
std
::
vector
<
int
>
>
matched
;
for
(
int64_t
_i
=
0
;
_i
<
ndets
;
_i
++
)
{
auto
i
=
order
[
_i
];
if
(
suppressed
[
i
]
==
1
)
continue
;
keep
.
push_back
(
i
);
std
::
vector
<
int
>
v_i
;
auto
ix1
=
x1
[
i
];
auto
iy1
=
y1
[
i
];
auto
ix2
=
x2
[
i
];
auto
iy2
=
y2
[
i
];
auto
iarea
=
areas
[
i
];
for
(
int64_t
_j
=
_i
+
1
;
_j
<
ndets
;
_j
++
)
{
auto
j
=
order
[
_j
];
if
(
suppressed
[
j
]
==
1
)
continue
;
auto
xx1
=
std
::
max
(
ix1
,
x1
[
j
]);
auto
yy1
=
std
::
max
(
iy1
,
y1
[
j
]);
auto
xx2
=
std
::
min
(
ix2
,
x2
[
j
]);
auto
yy2
=
std
::
min
(
iy2
,
y2
[
j
]);
auto
w
=
std
::
max
(
static_cast
<
float
>
(
0
),
xx2
-
xx1
);
auto
h
=
std
::
max
(
static_cast
<
float
>
(
0
),
yy2
-
yy1
);
auto
inter
=
w
*
h
;
auto
ovr
=
inter
/
(
iarea
+
areas
[
j
]
-
inter
);
if
(
ovr
>=
iou_threshold
)
{
suppressed
[
j
]
=
1
;
v_i
.
push_back
(
j
);
}
}
matched
.
push_back
(
v_i
);
}
for
(
int
i
=
0
;
i
<
keep
.
size
();
i
++
)
matched
[
i
].
insert
(
matched
[
i
].
begin
(),
keep
[
i
]);
return
matched
;
return
softnms_impl
(
boxes
,
scores
,
dets
,
iou_threshold
,
sigma
,
min_score
,
method
,
offset
);
}
std
::
vector
<
std
::
vector
<
int
>
>
nms_match
(
Tensor
dets
,
float
iou_threshold
)
{
if
(
dets
.
device
().
is_cuda
())
{
AT_ERROR
(
"nms_match is not implemented on GPU"
);
}
else
{
return
nms_match_cpu
(
dets
,
iou_threshold
);
}
return
nms_match_impl
(
dets
,
iou_threshold
);
}
mmcv/ops/csrc/parrots/nms_rotated_cpu.cpp
deleted
100644 → 0
View file @
df465820
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
// modified from
// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.cpp
#include "box_iou_rotated_utils.hpp"
#include "pytorch_cpp_helper.hpp"
template
<
typename
scalar_t
>
Tensor
nms_rotated_cpu_kernel
(
const
Tensor
dets
,
const
Tensor
scores
,
const
float
iou_threshold
)
{
// nms_rotated_cpu_kernel is modified from torchvision's nms_cpu_kernel,
// however, the code in this function is much shorter because
// we delegate the IoU computation for rotated boxes to
// the single_box_iou_rotated function in box_iou_rotated_utils.h
AT_ASSERTM
(
!
dets
.
type
().
is_cuda
(),
"dets must be a CPU tensor"
);
AT_ASSERTM
(
!
scores
.
type
().
is_cuda
(),
"scores must be a CPU tensor"
);
AT_ASSERTM
(
dets
.
type
()
==
scores
.
type
(),
"dets should have the same type as scores"
);
if
(
dets
.
numel
()
==
0
)
{
return
at
::
empty
({
0
},
dets
.
options
().
dtype
(
at
::
kLong
));
}
auto
order_t
=
std
::
get
<
1
>
(
scores
.
sort
(
0
,
/* descending=*/
true
));
auto
ndets
=
dets
.
size
(
0
);
Tensor
suppressed_t
=
at
::
zeros
({
ndets
},
dets
.
options
().
dtype
(
at
::
kByte
));
Tensor
keep_t
=
at
::
zeros
({
ndets
},
dets
.
options
().
dtype
(
at
::
kLong
));
auto
suppressed
=
suppressed_t
.
data_ptr
<
uint8_t
>
();
auto
keep
=
keep_t
.
data_ptr
<
int64_t
>
();
auto
order
=
order_t
.
data_ptr
<
int64_t
>
();
int64_t
num_to_keep
=
0
;
for
(
int64_t
_i
=
0
;
_i
<
ndets
;
_i
++
)
{
auto
i
=
order
[
_i
];
if
(
suppressed
[
i
]
==
1
)
{
continue
;
}
keep
[
num_to_keep
++
]
=
i
;
for
(
int64_t
_j
=
_i
+
1
;
_j
<
ndets
;
_j
++
)
{
auto
j
=
order
[
_j
];
if
(
suppressed
[
j
]
==
1
)
{
continue
;
}
auto
ovr
=
single_box_iou_rotated
<
scalar_t
>
(
dets
[
i
].
data_ptr
<
scalar_t
>
(),
dets
[
j
].
data_ptr
<
scalar_t
>
(),
0
);
if
(
ovr
>=
iou_threshold
)
{
suppressed
[
j
]
=
1
;
}
}
}
return
keep_t
.
narrow
(
/*dim=*/
0
,
/*start=*/
0
,
/*length=*/
num_to_keep
);
}
Tensor
nms_rotated_cpu
(
const
Tensor
dets
,
const
Tensor
scores
,
const
float
iou_threshold
)
{
auto
result
=
at
::
empty
({
0
},
dets
.
options
());
AT_DISPATCH_FLOATING_TYPES
(
dets
.
type
(),
"nms_rotated"
,
[
&
]
{
result
=
nms_rotated_cpu_kernel
<
scalar_t
>
(
dets
,
scores
,
iou_threshold
);
});
return
result
;
}
mmcv/ops/csrc/parrots/pixel_group.cpp
View file @
fdeee889
...
...
@@ -2,120 +2,14 @@
// It is modified from https://github.com/WenmuZhou/PAN.pytorch
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
std
::
vector
<
std
::
vector
<
float
>>
estimate_confidence
(
int32_t
*
label
,
float
*
score
,
int
label_num
,
int
height
,
int
width
)
{
std
::
vector
<
std
::
vector
<
float
>>
point_vector
;
for
(
int
i
=
0
;
i
<
label_num
;
i
++
)
{
std
::
vector
<
float
>
point
;
point
.
push_back
(
0
);
point
.
push_back
(
0
);
point_vector
.
push_back
(
point
);
}
for
(
int
y
=
0
;
y
<
height
;
y
++
)
{
auto
label_tmp
=
label
+
y
*
width
;
auto
score_tmp
=
score
+
y
*
width
;
for
(
int
x
=
0
;
x
<
width
;
x
++
)
{
auto
l
=
label_tmp
[
x
];
if
(
l
>
0
)
{
float
confidence
=
score_tmp
[
x
];
point_vector
[
l
].
push_back
(
x
);
point_vector
[
l
].
push_back
(
y
);
point_vector
[
l
][
0
]
+=
confidence
;
point_vector
[
l
][
1
]
+=
1
;
}
}
}
for
(
int
l
=
0
;
l
<
point_vector
.
size
();
l
++
)
if
(
point_vector
[
l
][
1
]
>
0
)
{
point_vector
[
l
][
0
]
/=
point_vector
[
l
][
1
];
}
return
point_vector
;
}
std
::
vector
<
std
::
vector
<
float
>>
pixel_group_cpu
(
std
::
vector
<
std
::
vector
<
float
>>
pixel_group_impl
(
Tensor
score
,
Tensor
mask
,
Tensor
embedding
,
Tensor
kernel_label
,
Tensor
kernel_contour
,
int
kernel_region_num
,
float
dis_threshold
)
{
assert
(
score
.
dim
()
==
2
);
assert
(
mask
.
dim
()
==
2
);
assert
(
embedding_dim
.
dim
()
==
3
);
int
height
=
score
.
size
(
0
);
int
width
=
score
.
size
(
1
);
assert
(
height
==
mask
.
size
(
0
)
==
embedding
.
size
(
1
)
==
kernel_label
.
size
(
1
));
assert
(
width
==
mask
.
size
(
1
)
==
embedding
.
size
(
2
)
==
kernel_label
.
size
(
2
));
auto
threshold_square
=
dis_threshold
*
dis_threshold
;
auto
ptr_score
=
score
.
data_ptr
<
float
>
();
auto
ptr_mask
=
mask
.
data_ptr
<
bool
>
();
auto
ptr_kernel_contour
=
kernel_contour
.
data_ptr
<
uint8_t
>
();
auto
ptr_embedding
=
embedding
.
data_ptr
<
float
>
();
auto
ptr_kernel_label
=
kernel_label
.
data_ptr
<
int32_t
>
();
std
::
queue
<
std
::
tuple
<
int
,
int
,
int32_t
>>
contour_pixels
;
auto
embedding_dim
=
embedding
.
size
(
2
);
std
::
vector
<
std
::
vector
<
float
>>
kernel_vector
(
kernel_region_num
,
std
::
vector
<
float
>
(
embedding_dim
+
1
,
0
));
Tensor
text_label
;
text_label
=
kernel_label
.
clone
();
auto
ptr_text_label
=
text_label
.
data_ptr
<
int32_t
>
();
for
(
int
i
=
0
;
i
<
height
;
i
++
)
{
auto
ptr_embedding_tmp
=
ptr_embedding
+
i
*
width
*
embedding_dim
;
auto
ptr_kernel_label_tmp
=
ptr_kernel_label
+
i
*
width
;
auto
ptr_kernel_contour_tmp
=
ptr_kernel_contour
+
i
*
width
;
for
(
int
j
=
0
,
k
=
0
;
j
<
width
&&
k
<
width
*
embedding_dim
;
j
++
,
k
+=
embedding_dim
)
{
int32_t
label
=
ptr_kernel_label_tmp
[
j
];
if
(
label
>
0
)
{
for
(
int
d
=
0
;
d
<
embedding_dim
;
d
++
)
kernel_vector
[
label
][
d
]
+=
ptr_embedding_tmp
[
k
+
d
];
kernel_vector
[
label
][
embedding_dim
]
+=
1
;
// kernel pixel number
if
(
ptr_kernel_contour_tmp
[
j
])
{
contour_pixels
.
push
(
std
::
make_tuple
(
i
,
j
,
label
));
}
}
}
}
for
(
int
i
=
0
;
i
<
kernel_region_num
;
i
++
)
{
for
(
int
j
=
0
;
j
<
embedding_dim
;
j
++
)
{
kernel_vector
[
i
][
j
]
/=
kernel_vector
[
i
][
embedding_dim
];
}
}
int
dx
[
4
]
=
{
-
1
,
1
,
0
,
0
};
int
dy
[
4
]
=
{
0
,
0
,
-
1
,
1
};
while
(
!
contour_pixels
.
empty
())
{
auto
query_pixel
=
contour_pixels
.
front
();
contour_pixels
.
pop
();
int
y
=
std
::
get
<
0
>
(
query_pixel
);
int
x
=
std
::
get
<
1
>
(
query_pixel
);
int32_t
l
=
std
::
get
<
2
>
(
query_pixel
);
auto
kernel_cv
=
kernel_vector
[
l
];
for
(
int
idx
=
0
;
idx
<
4
;
idx
++
)
{
int
tmpy
=
y
+
dy
[
idx
];
int
tmpx
=
x
+
dx
[
idx
];
auto
ptr_text_label_tmp
=
ptr_text_label
+
tmpy
*
width
;
if
(
tmpy
<
0
||
tmpy
>=
height
||
tmpx
<
0
||
tmpx
>=
width
)
continue
;
if
(
!
ptr_mask
[
tmpy
*
width
+
tmpx
]
||
ptr_text_label_tmp
[
tmpx
]
>
0
)
continue
;
float
dis
=
0
;
auto
ptr_embedding_tmp
=
ptr_embedding
+
tmpy
*
width
*
embedding_dim
;
for
(
size_t
i
=
0
;
i
<
embedding_dim
;
i
++
)
{
dis
+=
pow
(
kernel_cv
[
i
]
-
ptr_embedding_tmp
[
tmpx
*
embedding_dim
+
i
],
2
);
// ignore further computing if dis is big enough
if
(
dis
>=
threshold_square
)
break
;
}
if
(
dis
>=
threshold_square
)
continue
;
contour_pixels
.
push
(
std
::
make_tuple
(
tmpy
,
tmpx
,
l
));
ptr_text_label_tmp
[
tmpx
]
=
l
;
}
}
return
estimate_confidence
(
ptr_text_label
,
ptr_score
,
kernel_region_num
,
height
,
width
);
return
DISPATCH_DEVICE_IMPL
(
pixel_group_impl
,
score
,
mask
,
embedding
,
kernel_label
,
kernel_contour
,
kernel_region_num
,
dis_threshold
);
}
std
::
vector
<
std
::
vector
<
float
>>
pixel_group
(
...
...
@@ -127,11 +21,6 @@ std::vector<std::vector<float>> pixel_group(
kernel_label
=
kernel_label
.
contiguous
();
kernel_contour
=
kernel_contour
.
contiguous
();
CHECK_CPU_INPUT
(
score
);
CHECK_CPU_INPUT
(
mask
);
CHECK_CPU_INPUT
(
embedding
);
CHECK_CPU_INPUT
(
kernel_label
);
CHECK_CPU_INPUT
(
kernel_contour
);
return
pixel_group_cpu
(
score
,
mask
,
embedding
,
kernel_label
,
kernel_contour
,
kernel_region_num
,
distance_threshold
);
return
pixel_group_impl
(
score
,
mask
,
embedding
,
kernel_label
,
kernel_contour
,
kernel_region_num
,
distance_threshold
);
}
mmcv/ops/csrc/parrots/points_in_boxes.cpp
View file @
fdeee889
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
void
PointsInBoxesPartForwardCUDAKernelLauncher
(
int
batch_size
,
int
boxes_num
,
int
pts_num
,
const
Tensor
boxes
,
const
Tensor
pts
,
Tensor
box_idx_of_points
);
void
points_in_boxes_part_forward_cuda
(
int
batch_size
,
int
boxes_num
,
void
points_in_boxes_part_forward_impl
(
int
batch_size
,
int
boxes_num
,
int
pts_num
,
const
Tensor
boxes
,
const
Tensor
pts
,
Tensor
box_idx_of_points
)
{
PointsInBoxesPartForwardCUDAKernelLauncher
(
batch_size
,
boxes_num
,
pts_num
,
boxes
,
pts
,
box_idx_of_points
);
};
void
PointsInBoxesAllForwardCUDAKernelLauncher
(
int
batch_size
,
int
boxes_num
,
int
pts_num
,
const
Tensor
boxes
,
const
Tensor
pts
,
Tensor
box_idx_of_points
);
DISPATCH_DEVICE_IMPL
(
points_in_boxes_part_forward_impl
,
batch_size
,
boxes_num
,
pts_num
,
boxes
,
pts
,
box_idx_of_points
);
}
void
points_in_boxes_all_forward_
cuda
(
int
batch_size
,
int
boxes_num
,
void
points_in_boxes_all_forward_
impl
(
int
batch_size
,
int
boxes_num
,
int
pts_num
,
const
Tensor
boxes
,
const
Tensor
pts
,
Tensor
box_idx_of_points
)
{
PointsInBoxesAllForwardCUDAKernelLauncher
(
batch_size
,
boxes_num
,
pts_num
,
boxes
,
pts
,
box_idx_of_points
);
};
#endif
DISPATCH_DEVICE_IMPL
(
points_in_boxes_all_forward_impl
,
batch_size
,
boxes_num
,
pts_num
,
boxes
,
pts
,
box_idx_of_points
);
}
void
points_in_boxes_part_forward
(
Tensor
boxes_tensor
,
Tensor
pts_tensor
,
Tensor
box_idx_of_points_tensor
)
{
...
...
@@ -34,30 +23,12 @@ void points_in_boxes_part_forward(Tensor boxes_tensor, Tensor pts_tensor,
// coordinate, z is the bottom center, each box params pts: (B, npoints, 3)
// [x, y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints),
// default -1
if
(
pts_tensor
.
device
().
is_cuda
())
{
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT
(
boxes_tensor
);
CHECK_CUDA_INPUT
(
pts_tensor
);
CHECK_CUDA_INPUT
(
box_idx_of_points_tensor
);
int
batch_size
=
boxes_tensor
.
size
(
0
);
int
boxes_num
=
boxes_tensor
.
size
(
1
);
int
pts_num
=
pts_tensor
.
size
(
1
);
const
float
*
boxes
=
boxes_tensor
.
data_ptr
<
float
>
();
const
float
*
pts
=
pts_tensor
.
data_ptr
<
float
>
();
int
*
box_idx_of_points
=
box_idx_of_points_tensor
.
data_ptr
<
int
>
();
points_in_boxes_part_forward_cuda
(
batch_size
,
boxes_num
,
pts_num
,
boxes_tensor
,
pts_tensor
,
box_idx_of_points_tensor
);
#else
AT_ERROR
(
"points_in_boxes_part is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"points_in_boxes_part is not implemented on CPU"
);
}
int
batch_size
=
boxes_tensor
.
size
(
0
);
int
boxes_num
=
boxes_tensor
.
size
(
1
);
int
pts_num
=
pts_tensor
.
size
(
1
);
points_in_boxes_part_forward_impl
(
batch_size
,
boxes_num
,
pts_num
,
boxes_tensor
,
pts_tensor
,
box_idx_of_points_tensor
);
}
void
points_in_boxes_all_forward
(
Tensor
boxes_tensor
,
Tensor
pts_tensor
,
...
...
@@ -65,28 +36,9 @@ void points_in_boxes_all_forward(Tensor boxes_tensor, Tensor pts_tensor,
// params boxes: (B, N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR
// coordinate, z is the bottom center. params pts: (B, npoints, 3) [x, y, z]
// in LiDAR coordinate params boxes_idx_of_points: (B, npoints), default -1
if
(
pts_tensor
.
device
().
is_cuda
())
{
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT
(
boxes_tensor
);
CHECK_CUDA_INPUT
(
pts_tensor
);
CHECK_CUDA_INPUT
(
box_idx_of_points_tensor
);
int
batch_size
=
boxes_tensor
.
size
(
0
);
int
boxes_num
=
boxes_tensor
.
size
(
1
);
int
pts_num
=
pts_tensor
.
size
(
1
);
const
float
*
boxes
=
boxes_tensor
.
data_ptr
<
float
>
();
const
float
*
pts
=
pts_tensor
.
data_ptr
<
float
>
();
int
*
box_idx_of_points
=
box_idx_of_points_tensor
.
data_ptr
<
int
>
();
points_in_boxes_all_forward_cuda
(
batch_size
,
boxes_num
,
pts_num
,
boxes_tensor
,
pts_tensor
,
box_idx_of_points_tensor
);
#else
AT_ERROR
(
"points_in_boxes_all is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"points_in_boxes_all is not implemented on CPU"
);
}
int
batch_size
=
boxes_tensor
.
size
(
0
);
int
boxes_num
=
boxes_tensor
.
size
(
1
);
int
pts_num
=
pts_tensor
.
size
(
1
);
points_in_boxes_all_forward_impl
(
batch_size
,
boxes_num
,
pts_num
,
boxes_tensor
,
pts_tensor
,
box_idx_of_points_tensor
);
}
mmcv/ops/csrc/parrots/points_in_boxes_cpu.cpp
deleted
100644 → 0
View file @
df465820
#include "pytorch_cpp_helper.hpp"
inline
void
lidar_to_local_coords_cpu
(
float
shift_x
,
float
shift_y
,
float
rz
,
float
&
local_x
,
float
&
local_y
)
{
float
cosa
=
cos
(
-
rz
),
sina
=
sin
(
-
rz
);
local_x
=
shift_x
*
cosa
+
shift_y
*
(
-
sina
);
local_y
=
shift_x
*
sina
+
shift_y
*
cosa
;
}
inline
int
check_pt_in_box3d_cpu
(
const
float
*
pt
,
const
float
*
box3d
,
float
&
local_x
,
float
&
local_y
)
{
// param pt: (x, y, z)
// param box3d: (cx, cy, cz, x_size, y_size, z_size, rz) in LiDAR coordinate,
// cz in the bottom center
float
x
=
pt
[
0
],
y
=
pt
[
1
],
z
=
pt
[
2
];
float
cx
=
box3d
[
0
],
cy
=
box3d
[
1
],
cz
=
box3d
[
2
];
float
x_size
=
box3d
[
3
],
y_size
=
box3d
[
4
],
z_size
=
box3d
[
5
],
rz
=
box3d
[
6
];
cz
+=
z_size
/
2.0
;
// shift to the center since cz in box3d is the bottom center
if
(
fabsf
(
z
-
cz
)
>
z_size
/
2.0
)
return
0
;
lidar_to_local_coords_cpu
(
x
-
cx
,
y
-
cy
,
rz
,
local_x
,
local_y
);
float
in_flag
=
(
local_x
>
-
x_size
/
2.0
)
&
(
local_x
<
x_size
/
2.0
)
&
(
local_y
>
-
y_size
/
2.0
)
&
(
local_y
<
y_size
/
2.0
);
return
in_flag
;
}
void
points_in_boxes_cpu_forward
(
Tensor
boxes_tensor
,
Tensor
pts_tensor
,
Tensor
pts_indices_tensor
)
{
// params boxes: (N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR
// coordinate, z is the bottom center, each box DO NOT overlaps params pts:
// (npoints, 3) [x, y, z] in LiDAR coordinate params pts_indices: (N, npoints)
CHECK_CONTIGUOUS
(
boxes_tensor
);
CHECK_CONTIGUOUS
(
pts_tensor
);
CHECK_CONTIGUOUS
(
pts_indices_tensor
);
int
boxes_num
=
boxes_tensor
.
size
(
0
);
int
pts_num
=
pts_tensor
.
size
(
0
);
const
float
*
boxes
=
boxes_tensor
.
data_ptr
<
float
>
();
const
float
*
pts
=
pts_tensor
.
data_ptr
<
float
>
();
int
*
pts_indices
=
pts_indices_tensor
.
data_ptr
<
int
>
();
float
local_x
=
0
,
local_y
=
0
;
for
(
int
i
=
0
;
i
<
boxes_num
;
i
++
)
{
for
(
int
j
=
0
;
j
<
pts_num
;
j
++
)
{
int
cur_in_flag
=
check_pt_in_box3d_cpu
(
pts
+
j
*
3
,
boxes
+
i
*
7
,
local_x
,
local_y
);
pts_indices
[
i
*
pts_num
+
j
]
=
cur_in_flag
;
}
}
}
mmcv/ops/csrc/parrots/points_in_polygons.cpp
0 → 100644
View file @
fdeee889
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
void
points_in_polygons_forward_impl
(
const
Tensor
points
,
const
Tensor
polygons
,
Tensor
output
,
const
int
rows
,
const
int
cols
)
{
DISPATCH_DEVICE_IMPL
(
points_in_polygons_forward_impl
,
points
,
polygons
,
output
,
rows
,
cols
);
}
void
points_in_polygons_forward
(
Tensor
points
,
Tensor
polygons
,
Tensor
output
)
{
int
rows
=
points
.
size
(
0
);
int
cols
=
polygons
.
size
(
0
);
points_in_polygons_forward_impl
(
points
,
polygons
,
output
,
rows
,
cols
);
}
mmcv/ops/csrc/parrots/points_in_polygons_parrots.cpp
0 → 100644
View file @
fdeee889
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>
#include "points_in_polygons_pytorch.h"
using
namespace
parrots
;
#ifdef MMCV_WITH_CUDA
void
points_in_polygons_cuda_parrots
(
CudaContext
&
ctx
,
const
SSElement
&
attr
,
const
OperatorBase
::
in_list_t
&
ins
,
OperatorBase
::
out_list_t
&
outs
)
{
auto
points
=
buildATensor
(
ctx
,
ins
[
0
]);
auto
polygons
=
buildATensor
(
ctx
,
ins
[
1
]);
auto
output
=
buildATensor
(
ctx
,
outs
[
0
]);
points_in_polygons_forward
(
points
,
polygons
,
output
);
}
PARROTS_EXTENSION_REGISTER
(
points_in_polygons_forward
)
.
input
(
2
)
.
output
(
1
)
.
apply
(
points_in_polygons_cuda_parrots
)
.
done
();
#endif
mmcv/ops/csrc/parrots/points_in_polygons_pytorch.h
0 → 100644
View file @
fdeee889
// Copyright (c) OpenMMLab. All rights reserved
#ifndef POINTS_IN_POLYGONS_PYTORCH_H
#define POINTS_IN_POLYGONS_PYTORCH_H
#include <torch/extension.h>
using
namespace
at
;
void
points_in_polygons_forward
(
Tensor
points
,
Tensor
polygons
,
Tensor
output
);
#endif // POINTS_IN_POLYGONS_PYTORCH_H
mmcv/ops/csrc/parrots/psamask.cpp
View file @
fdeee889
...
...
@@ -2,255 +2,40 @@
// Modified from
// https://github.com/hszhao/semseg/blob/master/lib/psa/src
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifndef min
#define min(a, b) (((a) < (b)) ? (a) : (b))
#endif
#ifndef max
#define max(a, b) (((a) > (b)) ? (a) : (b))
#endif
void
psamask_collect_forward
(
const
int
num_
,
const
int
h_feature
,
const
int
w_feature
,
const
int
h_mask
,
const
int
w_mask
,
const
int
half_h_mask
,
const
int
half_w_mask
,
const
Tensor
mask_data
,
Tensor
buffer_data
)
{
for
(
int
n
=
0
;
n
<
num_
;
n
++
)
{
for
(
int
h
=
0
;
h
<
h_feature
;
h
++
)
{
for
(
int
w
=
0
;
w
<
w_feature
;
w
++
)
{
// effective mask region : [hstart, hend) x [wstart, wend) with
// mask-indexed
const
int
hstart
=
max
(
0
,
half_h_mask
-
h
);
const
int
hend
=
min
(
h_mask
,
h_feature
+
half_h_mask
-
h
);
const
int
wstart
=
max
(
0
,
half_w_mask
-
w
);
const
int
wend
=
min
(
w_mask
,
w_feature
+
half_w_mask
-
w
);
// (hidx, widx ) with mask-indexed
// (hidx + h - half_h_mask, widx + w - half_w_mask) with
// feature-indexed
for
(
int
hidx
=
hstart
;
hidx
<
hend
;
hidx
++
)
{
for
(
int
widx
=
wstart
;
widx
<
wend
;
widx
++
)
{
buffer_data
.
view
({
-
1
})[(
n
*
h_feature
*
w_feature
+
(
hidx
+
h
-
half_h_mask
)
*
w_feature
+
(
widx
+
w
-
half_w_mask
))
*
h_feature
*
w_feature
+
h
*
w_feature
+
w
]
=
mask_data
.
view
(
{
-
1
})[((
n
*
h_mask
*
w_mask
+
hidx
*
w_mask
+
widx
)
*
h_feature
+
h
)
*
w_feature
+
w
];
}
}
}
}
}
}
void
psamask_distribute_forward
(
const
int
num_
,
const
int
h_feature
,
const
int
w_feature
,
const
int
h_mask
,
const
int
w_mask
,
const
int
half_h_mask
,
const
int
half_w_mask
,
const
Tensor
mask_data
,
Tensor
buffer_data
)
{
for
(
int
n
=
0
;
n
<
num_
;
n
++
)
{
for
(
int
h
=
0
;
h
<
h_feature
;
h
++
)
{
for
(
int
w
=
0
;
w
<
w_feature
;
w
++
)
{
// effective mask region : [hstart, hend) x [wstart, wend) with
// mask-indexed
const
int
hstart
=
max
(
0
,
half_h_mask
-
h
);
const
int
hend
=
min
(
h_mask
,
h_feature
+
half_h_mask
-
h
);
const
int
wstart
=
max
(
0
,
half_w_mask
-
w
);
const
int
wend
=
min
(
w_mask
,
w_feature
+
half_w_mask
-
w
);
// (hidx, widx ) with mask-indexed
// (hidx + h - half_h_mask, widx + w - half_w_mask) with
// feature-indexed
for
(
int
hidx
=
hstart
;
hidx
<
hend
;
hidx
++
)
{
for
(
int
widx
=
wstart
;
widx
<
wend
;
widx
++
)
{
buffer_data
.
view
(
{
-
1
})[(
n
*
h_feature
*
w_feature
+
h
*
w_feature
+
w
)
*
h_feature
*
w_feature
+
(
hidx
+
h
-
half_h_mask
)
*
w_feature
+
(
widx
+
w
-
half_w_mask
)]
=
mask_data
.
view
(
{
-
1
})[((
n
*
h_mask
*
w_mask
+
hidx
*
w_mask
+
widx
)
*
h_feature
+
h
)
*
w_feature
+
w
];
}
}
}
}
}
}
void
psamask_collect_backward
(
const
int
num_
,
const
int
h_feature
,
const
int
w_feature
,
const
int
h_mask
,
const
int
w_mask
,
const
int
half_h_mask
,
const
int
half_w_mask
,
const
Tensor
buffer_diff
,
Tensor
mask_diff
)
{
for
(
int
n
=
0
;
n
<
num_
;
n
++
)
{
for
(
int
h
=
0
;
h
<
h_feature
;
h
++
)
{
for
(
int
w
=
0
;
w
<
w_feature
;
w
++
)
{
// effective mask region : [hstart, hend) x [wstart, wend) with
// mask-indexed
const
int
hstart
=
max
(
0
,
half_h_mask
-
h
);
const
int
hend
=
min
(
h_mask
,
h_feature
+
half_h_mask
-
h
);
const
int
wstart
=
max
(
0
,
half_w_mask
-
w
);
const
int
wend
=
min
(
w_mask
,
w_feature
+
half_w_mask
-
w
);
// (hidx, widx ) with mask-indexed
// (hidx + h - half_h_mask, widx + w - half_w_mask) with
// feature-indexed
for
(
int
hidx
=
hstart
;
hidx
<
hend
;
hidx
++
)
{
for
(
int
widx
=
wstart
;
widx
<
wend
;
widx
++
)
{
mask_diff
.
view
({
-
1
})[((
n
*
h_mask
*
w_mask
+
hidx
*
w_mask
+
widx
)
*
h_feature
+
h
)
*
w_feature
+
w
]
=
buffer_diff
.
view
({
-
1
})[(
n
*
h_feature
*
w_feature
+
(
hidx
+
h
-
half_h_mask
)
*
w_feature
+
(
widx
+
w
-
half_w_mask
))
*
h_feature
*
w_feature
+
h
*
w_feature
+
w
];
}
}
}
}
}
}
void
psamask_distribute_backward
(
const
int
num_
,
const
int
h_feature
,
const
int
w_feature
,
const
int
h_mask
,
const
int
w_mask
,
const
int
half_h_mask
,
const
int
half_w_mask
,
const
Tensor
buffer_diff
,
Tensor
mask_diff
)
{
for
(
int
n
=
0
;
n
<
num_
;
n
++
)
{
for
(
int
h
=
0
;
h
<
h_feature
;
h
++
)
{
for
(
int
w
=
0
;
w
<
w_feature
;
w
++
)
{
// effective mask region : [hstart, hend) x [wstart, wend) with
// mask-indexed
const
int
hstart
=
max
(
0
,
half_h_mask
-
h
);
const
int
hend
=
min
(
h_mask
,
h_feature
+
half_h_mask
-
h
);
const
int
wstart
=
max
(
0
,
half_w_mask
-
w
);
const
int
wend
=
min
(
w_mask
,
w_feature
+
half_w_mask
-
w
);
// (hidx, widx ) with mask-indexed
// (hidx + h - half_h_mask, widx + w - half_w_mask) with
// feature-indexed
for
(
int
hidx
=
hstart
;
hidx
<
hend
;
hidx
++
)
{
for
(
int
widx
=
wstart
;
widx
<
wend
;
widx
++
)
{
mask_diff
.
view
({
-
1
})[((
n
*
h_mask
*
w_mask
+
hidx
*
w_mask
+
widx
)
*
h_feature
+
h
)
*
w_feature
+
w
]
=
buffer_diff
.
view
(
{
-
1
})[(
n
*
h_feature
*
w_feature
+
h
*
w_feature
+
w
)
*
h_feature
*
w_feature
+
(
hidx
+
h
-
half_h_mask
)
*
w_feature
+
(
widx
+
w
-
half_w_mask
)];
}
}
}
}
}
}
void
psamask_forward_cpu
(
const
int
psa_type
,
const
Tensor
input
,
Tensor
output
,
const
int
num_
,
const
int
h_feature
,
const
int
w_feature
,
const
int
h_mask
,
const
int
w_mask
,
const
int
half_h_mask
,
const
int
half_w_mask
)
{
if
(
psa_type
==
0
)
psamask_collect_forward
(
num_
,
h_feature
,
w_feature
,
h_mask
,
w_mask
,
half_h_mask
,
half_w_mask
,
input
,
output
);
else
psamask_distribute_forward
(
num_
,
h_feature
,
w_feature
,
h_mask
,
w_mask
,
half_h_mask
,
half_w_mask
,
input
,
output
);
}
void
psamask_backward_cpu
(
const
int
psa_type
,
const
Tensor
grad_output
,
Tensor
grad_input
,
const
int
num_
,
const
int
h_feature
,
const
int
w_feature
,
const
int
h_mask
,
const
int
w_mask
,
const
int
half_h_mask
,
const
int
half_w_mask
)
{
if
(
psa_type
==
0
)
psamask_collect_backward
(
num_
,
h_feature
,
w_feature
,
h_mask
,
w_mask
,
half_h_mask
,
half_w_mask
,
grad_output
,
grad_input
);
else
psamask_distribute_backward
(
num_
,
h_feature
,
w_feature
,
h_mask
,
w_mask
,
half_h_mask
,
half_w_mask
,
grad_output
,
grad_input
);
}
#ifdef MMCV_WITH_CUDA
void
PSAMaskForwardCUDAKernelLauncher
(
const
int
psa_type
,
const
Tensor
input
,
Tensor
output
,
const
int
num_
,
const
int
h_feature
,
const
int
w_feature
,
const
int
h_mask
,
const
int
w_mask
,
const
int
half_h_mask
,
const
int
half_w_mask
);
void
PSAMaskBackwardCUDAKernelLauncher
(
const
int
psa_type
,
const
Tensor
grad_output
,
Tensor
grad_input
,
const
int
num_
,
const
int
h_feature
,
const
int
w_feature
,
const
int
h_mask
,
const
int
w_mask
,
const
int
half_h_mask
,
const
int
half_w_mask
);
void
psamask_forward_cuda
(
const
int
psa_type
,
const
Tensor
input
,
Tensor
output
,
void
psamask_forward_impl
(
const
int
psa_type
,
const
Tensor
input
,
Tensor
output
,
const
int
num_
,
const
int
h_feature
,
const
int
w_feature
,
const
int
h_mask
,
const
int
w_mask
,
const
int
half_h_mask
,
const
int
half_w_mask
)
{
PSAM
ask
F
orward
CUDAKernelLauncher
(
psa_type
,
input
,
output
,
num_
,
h_feature
,
w_feature
,
h_mask
,
w_mask
,
half_h_mask
,
half_w_mask
);
DISPATCH_DEVICE_IMPL
(
psam
ask
_f
orward
_impl
,
psa_type
,
input
,
output
,
num_
,
h_feature
,
w_feature
,
h_mask
,
w_mask
,
half_h_mask
,
half_w_mask
);
}
void
psamask_backward_
cuda
(
const
int
psa_type
,
const
Tensor
grad_output
,
void
psamask_backward_
impl
(
const
int
psa_type
,
const
Tensor
grad_output
,
Tensor
grad_input
,
const
int
num_
,
const
int
h_feature
,
const
int
w_feature
,
const
int
h_mask
,
const
int
w_mask
,
const
int
half_h_mask
,
const
int
half_w_mask
)
{
PSAM
ask
B
ackward
CUDAKernelLauncher
(
psa_type
,
grad_output
,
grad_input
,
num_
,
h_feature
,
w_feature
,
h_mask
,
w_mask
,
half_h_mask
,
half_w_mask
);
DISPATCH_DEVICE_IMPL
(
psam
ask
_b
ackward
_impl
,
psa_type
,
grad_output
,
grad_input
,
num_
,
h_feature
,
w_feature
,
h_mask
,
w_mask
,
half_h_mask
,
half_w_mask
);
}
#endif
void
psamask_forward
(
const
Tensor
input
,
Tensor
output
,
const
int
psa_type
,
const
int
num_
,
const
int
h_feature
,
const
int
w_feature
,
const
int
h_mask
,
const
int
w_mask
,
const
int
half_h_mask
,
const
int
half_w_mask
)
{
if
(
input
.
device
().
is_cuda
())
{
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT
(
input
);
CHECK_CUDA_INPUT
(
output
);
psamask_forward_cuda
(
psa_type
,
input
,
output
,
num_
,
h_feature
,
w_feature
,
h_mask
,
w_mask
,
half_h_mask
,
half_w_mask
);
#else
AT_ERROR
(
"PSAMask is not compiled with GPU support"
);
#endif
}
else
{
psamask_forward_cpu
(
psa_type
,
input
,
output
,
num_
,
h_feature
,
w_feature
,
h_mask
,
w_mask
,
half_h_mask
,
half_w_mask
);
}
psamask_forward_impl
(
psa_type
,
input
,
output
,
num_
,
h_feature
,
w_feature
,
h_mask
,
w_mask
,
half_h_mask
,
half_w_mask
);
}
void
psamask_backward
(
Tensor
grad_output
,
const
Tensor
grad_input
,
const
int
psa_type
,
const
int
num_
,
const
int
h_feature
,
const
int
w_feature
,
const
int
h_mask
,
const
int
w_mask
,
const
int
half_h_mask
,
const
int
half_w_mask
)
{
if
(
grad_input
.
device
().
is_cuda
())
{
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT
(
grad_input
);
CHECK_CUDA_INPUT
(
grad_output
);
psamask_backward_cuda
(
psa_type
,
grad_output
,
grad_input
,
num_
,
h_feature
,
w_feature
,
h_mask
,
w_mask
,
half_h_mask
,
half_w_mask
);
#else
AT_ERROR
(
"PSAMask is not compiled with GPU support"
);
#endif
}
else
{
psamask_backward_cpu
(
psa_type
,
grad_output
,
grad_input
,
num_
,
h_feature
,
w_feature
,
h_mask
,
w_mask
,
half_h_mask
,
half_w_mask
);
}
psamask_backward_impl
(
psa_type
,
grad_output
,
grad_input
,
num_
,
h_feature
,
w_feature
,
h_mask
,
w_mask
,
half_h_mask
,
half_w_mask
);
}
mmcv/ops/csrc/parrots/riroi_align_rotated.cpp
0 → 100644
View file @
fdeee889
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
void
riroi_align_rotated_forward_impl
(
Tensor
features
,
Tensor
rois
,
Tensor
output
,
int
pooled_height
,
int
pooled_width
,
float
spatial_scale
,
int
num_samples
,
int
num_orientations
,
bool
clockwise
)
{
DISPATCH_DEVICE_IMPL
(
riroi_align_rotated_forward_impl
,
features
,
rois
,
output
,
pooled_height
,
pooled_width
,
spatial_scale
,
num_samples
,
num_orientations
,
clockwise
);
}
void
riroi_align_rotated_backward_impl
(
Tensor
top_grad
,
Tensor
rois
,
Tensor
bottom_grad
,
int
pooled_height
,
int
pooled_width
,
float
spatial_scale
,
int
num_samples
,
int
num_orientations
,
bool
clockwise
)
{
DISPATCH_DEVICE_IMPL
(
riroi_align_rotated_backward_impl
,
top_grad
,
rois
,
bottom_grad
,
pooled_height
,
pooled_width
,
spatial_scale
,
num_samples
,
num_orientations
,
clockwise
);
}
void
riroi_align_rotated_forward
(
Tensor
features
,
Tensor
rois
,
Tensor
output
,
int
pooled_height
,
int
pooled_width
,
float
spatial_scale
,
int
num_samples
,
int
num_orientations
,
bool
clockwise
)
{
riroi_align_rotated_forward_impl
(
features
,
rois
,
output
,
pooled_height
,
pooled_width
,
spatial_scale
,
num_samples
,
num_orientations
,
clockwise
);
}
void
riroi_align_rotated_backward
(
Tensor
top_grad
,
Tensor
rois
,
Tensor
bottom_grad
,
int
pooled_height
,
int
pooled_width
,
float
spatial_scale
,
int
num_samples
,
int
num_orientations
,
bool
clockwise
)
{
riroi_align_rotated_backward_impl
(
top_grad
,
rois
,
bottom_grad
,
pooled_height
,
pooled_width
,
spatial_scale
,
num_samples
,
num_orientations
,
clockwise
);
}
mmcv/ops/csrc/parrots/riroi_align_rotated_parrots.cpp
0 → 100644
View file @
fdeee889
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>
#include "riroi_align_rotated_pytorch.h"
using
namespace
parrots
;
#ifdef MMCV_WITH_CUDA
void
riroi_align_rotated_forward_cuda_parrots
(
CudaContext
&
ctx
,
const
SSElement
&
attr
,
const
OperatorBase
::
in_list_t
&
ins
,
OperatorBase
::
out_list_t
&
outs
)
{
int
pooled_height
;
int
pooled_width
;
float
spatial_scale
;
int
sample_num
;
int
num_orientations
;
bool
clockwise
;
SSAttrs
(
attr
)
.
get
<
int
>
(
"pooled_height"
,
pooled_height
)
.
get
<
int
>
(
"pooled_width"
,
pooled_width
)
.
get
<
float
>
(
"spatial_scale"
,
spatial_scale
)
.
get
<
int
>
(
"num_samples"
,
sample_num
)
.
get
<
int
>
(
"num_orientations"
,
num_orientations
)
.
get
<
bool
>
(
"clockwise"
,
clockwise
)
.
done
();
auto
input
=
buildATensor
(
ctx
,
ins
[
0
]);
auto
rois
=
buildATensor
(
ctx
,
ins
[
1
]);
auto
output
=
buildATensor
(
ctx
,
outs
[
0
]);
riroi_align_rotated_forward
(
input
,
rois
,
output
,
pooled_height
,
pooled_width
,
spatial_scale
,
sample_num
,
num_orientations
,
clockwise
);
}
void
riroi_align_rotated_backward_cuda_parrots
(
CudaContext
&
ctx
,
const
SSElement
&
attr
,
const
OperatorBase
::
in_list_t
&
ins
,
OperatorBase
::
out_list_t
&
outs
)
{
int
pooled_height
;
int
pooled_width
;
float
spatial_scale
;
int
sample_num
;
int
num_orientations
;
bool
clockwise
;
SSAttrs
(
attr
)
.
get
<
int
>
(
"pooled_height"
,
pooled_height
)
.
get
<
int
>
(
"pooled_width"
,
pooled_width
)
.
get
<
float
>
(
"spatial_scale"
,
spatial_scale
)
.
get
<
int
>
(
"num_samples"
,
sample_num
)
.
get
<
int
>
(
"num_orientations"
,
num_orientations
)
.
get
<
bool
>
(
"clockwise"
,
clockwise
)
.
done
();
auto
grad_output
=
buildATensor
(
ctx
,
ins
[
0
]);
auto
rois
=
buildATensor
(
ctx
,
ins
[
1
]);
auto
grad_input
=
buildATensor
(
ctx
,
outs
[
0
]);
riroi_align_rotated_backward
(
grad_output
,
rois
,
grad_input
,
pooled_height
,
pooled_width
,
spatial_scale
,
sample_num
,
num_orientations
,
clockwise
);
}
PARROTS_EXTENSION_REGISTER
(
riroi_align_rotated_forward
)
.
attr
(
"pooled_height"
)
.
attr
(
"pooled_width"
)
.
attr
(
"spatial_scale"
)
.
attr
(
"num_samples"
)
.
attr
(
"num_orientations"
)
.
attr
(
"clockwise"
)
.
input
(
2
)
.
output
(
1
)
.
apply
(
riroi_align_rotated_forward_cuda_parrots
)
.
done
();
PARROTS_EXTENSION_REGISTER
(
riroi_align_rotated_backward
)
.
attr
(
"pooled_height"
)
.
attr
(
"pooled_width"
)
.
attr
(
"spatial_scale"
)
.
attr
(
"num_samples"
)
.
attr
(
"num_orientations"
)
.
attr
(
"clockwise"
)
.
input
(
2
)
.
output
(
1
)
.
apply
(
riroi_align_rotated_backward_cuda_parrots
)
.
done
();
#endif
mmcv/ops/csrc/parrots/riroi_align_rotated_pytorch.h
0 → 100644
View file @
fdeee889
// Copyright (c) OpenMMLab. All rights reserved
#ifndef RIROI_ALIGN_ROTATED_PYTORCH_H
#define RIROI_ALIGN_ROTATED_PYTORCH_H
#include <torch/extension.h>
using
namespace
at
;
void
riroi_align_rotated_forward
(
Tensor
features
,
Tensor
rois
,
Tensor
output
,
int
pooled_height
,
int
pooled_width
,
float
spatial_scale
,
int
num_samples
,
int
num_orientations
,
bool
clockwise
);
void
riroi_align_rotated_backward
(
Tensor
top_grad
,
Tensor
rois
,
Tensor
bottom_grad
,
int
pooled_height
,
int
pooled_width
,
float
spatial_scale
,
int
num_samples
,
int
num_orientations
,
bool
clockwise
);
#endif // RIROI_ALIGN_ROTATED_PYTORCH_H
mmcv/ops/csrc/parrots/roi_align.cpp
View file @
fdeee889
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
void
ROIAlignForwardCUDAKernelLauncher
(
Tensor
input
,
Tensor
rois
,
Tensor
output
,
Tensor
argmax_y
,
Tensor
argmax_x
,
int
aligned_height
,
int
aligned_width
,
float
spatial_scale
,
int
sampling_ratio
,
int
pool_mode
,
bool
aligned
);
void
ROIAlignBackwardCUDAKernelLauncher
(
Tensor
grad_output
,
Tensor
rois
,
Tensor
argmax_y
,
Tensor
argmax_x
,
Tensor
grad_input
,
int
aligned_height
,
int
aligned_width
,
float
spatial_scale
,
int
sampling_ratio
,
int
pool_mode
,
bool
aligned
);
void
roi_align_forward_cuda
(
Tensor
input
,
Tensor
rois
,
Tensor
output
,
void
roi_align_forward_impl
(
Tensor
input
,
Tensor
rois
,
Tensor
output
,
Tensor
argmax_y
,
Tensor
argmax_x
,
int
aligned_height
,
int
aligned_width
,
float
spatial_scale
,
int
sampling_ratio
,
int
pool_mode
,
bool
aligned
)
{
ROIA
lign
F
orward
CUDAKernelLauncher
(
input
,
rois
,
output
,
argmax_y
,
argmax_x
,
aligned_height
,
aligned_width
,
spatial_scale
,
sampling_ratio
,
pool_mode
,
aligned
);
DISPATCH_DEVICE_IMPL
(
roi_a
lign
_f
orward
_impl
,
input
,
rois
,
output
,
argmax_y
,
argmax_x
,
aligned_height
,
aligned_width
,
spatial_scale
,
sampling_ratio
,
pool_mode
,
aligned
);
}
void
roi_align_backward_
cuda
(
Tensor
grad_output
,
Tensor
rois
,
Tensor
argmax_y
,
void
roi_align_backward_
impl
(
Tensor
grad_output
,
Tensor
rois
,
Tensor
argmax_y
,
Tensor
argmax_x
,
Tensor
grad_input
,
int
aligned_height
,
int
aligned_width
,
float
spatial_scale
,
int
sampling_ratio
,
int
pool_mode
,
bool
aligned
)
{
ROIAlignBackwardCUDAKernelLauncher
(
grad_output
,
rois
,
argmax_y
,
argmax_x
,
grad_input
,
aligned_height
,
aligned_width
,
spatial_scale
,
sampling_ratio
,
pool_mode
,
aligned
);
}
#endif
void
ROIAlignForwardCPULauncher
(
Tensor
input
,
Tensor
rois
,
Tensor
output
,
Tensor
argmax_y
,
Tensor
argmax_x
,
int
aligned_height
,
int
aligned_width
,
float
spatial_scale
,
int
sampling_ratio
,
int
pool_mode
,
bool
aligned
);
void
ROIAlignBackwardCPULauncher
(
Tensor
grad_output
,
Tensor
rois
,
Tensor
argmax_y
,
Tensor
argmax_x
,
Tensor
grad_input
,
int
aligned_height
,
int
aligned_width
,
float
spatial_scale
,
int
sampling_ratio
,
int
pool_mode
,
bool
aligned
);
void
roi_align_forward_cpu
(
Tensor
input
,
Tensor
rois
,
Tensor
output
,
Tensor
argmax_y
,
Tensor
argmax_x
,
int
aligned_height
,
int
aligned_width
,
float
spatial_scale
,
int
sampling_ratio
,
int
pool_mode
,
bool
aligned
)
{
ROIAlignForwardCPULauncher
(
input
,
rois
,
output
,
argmax_y
,
argmax_x
,
aligned_height
,
aligned_width
,
spatial_scale
,
sampling_ratio
,
pool_mode
,
aligned
);
}
void
roi_align_backward_cpu
(
Tensor
grad_output
,
Tensor
rois
,
Tensor
argmax_y
,
Tensor
argmax_x
,
Tensor
grad_input
,
int
aligned_height
,
int
aligned_width
,
float
spatial_scale
,
int
sampling_ratio
,
int
pool_mode
,
bool
aligned
)
{
ROIAlignBackwardCPULauncher
(
grad_output
,
rois
,
argmax_y
,
argmax_x
,
grad_input
,
aligned_height
,
aligned_width
,
spatial_scale
,
sampling_ratio
,
pool_mode
,
aligned
);
DISPATCH_DEVICE_IMPL
(
roi_align_backward_impl
,
grad_output
,
rois
,
argmax_y
,
argmax_x
,
grad_input
,
aligned_height
,
aligned_width
,
spatial_scale
,
sampling_ratio
,
pool_mode
,
aligned
);
}
void
roi_align_forward
(
Tensor
input
,
Tensor
rois
,
Tensor
output
,
Tensor
argmax_y
,
Tensor
argmax_x
,
int
aligned_height
,
int
aligned_width
,
float
spatial_scale
,
int
sampling_ratio
,
int
pool_mode
,
bool
aligned
)
{
if
(
input
.
device
().
is_cuda
())
{
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT
(
input
);
CHECK_CUDA_INPUT
(
rois
);
CHECK_CUDA_INPUT
(
output
);
CHECK_CUDA_INPUT
(
argmax_y
);
CHECK_CUDA_INPUT
(
argmax_x
);
roi_align_forward_cuda
(
input
,
rois
,
output
,
argmax_y
,
argmax_x
,
aligned_height
,
aligned_width
,
spatial_scale
,
sampling_ratio
,
pool_mode
,
aligned
);
#else
AT_ERROR
(
"RoIAlign is not compiled with GPU support"
);
#endif
}
else
{
CHECK_CPU_INPUT
(
input
);
CHECK_CPU_INPUT
(
rois
);
CHECK_CPU_INPUT
(
output
);
CHECK_CPU_INPUT
(
argmax_y
);
CHECK_CPU_INPUT
(
argmax_x
);
roi_align_forward_cpu
(
input
,
rois
,
output
,
argmax_y
,
argmax_x
,
aligned_height
,
aligned_width
,
spatial_scale
,
sampling_ratio
,
pool_mode
,
aligned
);
}
roi_align_forward_impl
(
input
,
rois
,
output
,
argmax_y
,
argmax_x
,
aligned_height
,
aligned_width
,
spatial_scale
,
sampling_ratio
,
pool_mode
,
aligned
);
}
void
roi_align_backward
(
Tensor
grad_output
,
Tensor
rois
,
Tensor
argmax_y
,
Tensor
argmax_x
,
Tensor
grad_input
,
int
aligned_height
,
int
aligned_width
,
float
spatial_scale
,
int
sampling_ratio
,
int
pool_mode
,
bool
aligned
)
{
if
(
grad_output
.
device
().
is_cuda
())
{
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT
(
grad_output
);
CHECK_CUDA_INPUT
(
rois
);
CHECK_CUDA_INPUT
(
argmax_y
);
CHECK_CUDA_INPUT
(
argmax_x
);
CHECK_CUDA_INPUT
(
grad_input
);
roi_align_backward_cuda
(
grad_output
,
rois
,
argmax_y
,
argmax_x
,
grad_input
,
aligned_height
,
aligned_width
,
spatial_scale
,
sampling_ratio
,
pool_mode
,
aligned
);
#else
AT_ERROR
(
"RoIAlign is not compiled with GPU support"
);
#endif
}
else
{
CHECK_CPU_INPUT
(
grad_output
);
CHECK_CPU_INPUT
(
rois
);
CHECK_CPU_INPUT
(
argmax_y
);
CHECK_CPU_INPUT
(
argmax_x
);
CHECK_CPU_INPUT
(
grad_input
);
roi_align_backward_cpu
(
grad_output
,
rois
,
argmax_y
,
argmax_x
,
grad_input
,
aligned_height
,
aligned_width
,
spatial_scale
,
sampling_ratio
,
pool_mode
,
aligned
);
}
roi_align_backward_impl
(
grad_output
,
rois
,
argmax_y
,
argmax_x
,
grad_input
,
aligned_height
,
aligned_width
,
spatial_scale
,
sampling_ratio
,
pool_mode
,
aligned
);
}
mmcv/ops/csrc/parrots/roi_align_cpu.cpp
deleted
100644 → 0
View file @
df465820
// Modified from
// https://github.com/facebookresearch/detectron2/tree/master/detectron2/layers/csrc/ROIAlign
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
#include <ATen/ATen.h>
#include <ATen/TensorUtils.h>
#include "../pytorch_cpp_helper.hpp"
// implementation taken from Caffe2
template
<
typename
T
>
struct
PreCalc
{
int
pos1
;
int
pos2
;
int
pos3
;
int
pos4
;
T
w1
;
T
w2
;
T
w3
;
T
w4
;
};
template
<
typename
T
>
void
pre_calc_for_bilinear_interpolate
(
const
int
height
,
const
int
width
,
const
int
pooled_height
,
const
int
pooled_width
,
const
int
iy_upper
,
const
int
ix_upper
,
T
roi_start_h
,
T
roi_start_w
,
T
bin_size_h
,
T
bin_size_w
,
int
roi_bin_grid_h
,
int
roi_bin_grid_w
,
std
::
vector
<
PreCalc
<
T
>>&
pre_calc
)
{
int
pre_calc_index
=
0
;
for
(
int
ph
=
0
;
ph
<
pooled_height
;
ph
++
)
{
for
(
int
pw
=
0
;
pw
<
pooled_width
;
pw
++
)
{
for
(
int
iy
=
0
;
iy
<
iy_upper
;
iy
++
)
{
const
T
yy
=
roi_start_h
+
ph
*
bin_size_h
+
static_cast
<
T
>
(
iy
+
.5
f
)
*
bin_size_h
/
static_cast
<
T
>
(
roi_bin_grid_h
);
// e.g., 0.5, 1.5
for
(
int
ix
=
0
;
ix
<
ix_upper
;
ix
++
)
{
const
T
xx
=
roi_start_w
+
pw
*
bin_size_w
+
static_cast
<
T
>
(
ix
+
.5
f
)
*
bin_size_w
/
static_cast
<
T
>
(
roi_bin_grid_w
);
T
x
=
xx
;
T
y
=
yy
;
// deal with: inverse elements are out of feature map boundary
if
(
y
<
-
1.0
||
y
>
height
||
x
<
-
1.0
||
x
>
width
)
{
// empty
PreCalc
<
T
>
pc
;
pc
.
pos1
=
0
;
pc
.
pos2
=
0
;
pc
.
pos3
=
0
;
pc
.
pos4
=
0
;
pc
.
w1
=
0
;
pc
.
w2
=
0
;
pc
.
w3
=
0
;
pc
.
w4
=
0
;
pre_calc
[
pre_calc_index
]
=
pc
;
pre_calc_index
+=
1
;
continue
;
}
if
(
y
<=
0
)
{
y
=
0
;
}
if
(
x
<=
0
)
{
x
=
0
;
}
int
y_low
=
(
int
)
y
;
int
x_low
=
(
int
)
x
;
int
y_high
;
int
x_high
;
if
(
y_low
>=
height
-
1
)
{
y_high
=
y_low
=
height
-
1
;
y
=
(
T
)
y_low
;
}
else
{
y_high
=
y_low
+
1
;
}
if
(
x_low
>=
width
-
1
)
{
x_high
=
x_low
=
width
-
1
;
x
=
(
T
)
x_low
;
}
else
{
x_high
=
x_low
+
1
;
}
T
ly
=
y
-
y_low
;
T
lx
=
x
-
x_low
;
T
hy
=
1.
-
ly
,
hx
=
1.
-
lx
;
T
w1
=
hy
*
hx
,
w2
=
hy
*
lx
,
w3
=
ly
*
hx
,
w4
=
ly
*
lx
;
// save weights and indices
PreCalc
<
T
>
pc
;
pc
.
pos1
=
y_low
*
width
+
x_low
;
pc
.
pos2
=
y_low
*
width
+
x_high
;
pc
.
pos3
=
y_high
*
width
+
x_low
;
pc
.
pos4
=
y_high
*
width
+
x_high
;
pc
.
w1
=
w1
;
pc
.
w2
=
w2
;
pc
.
w3
=
w3
;
pc
.
w4
=
w4
;
pre_calc
[
pre_calc_index
]
=
pc
;
pre_calc_index
+=
1
;
}
}
}
}
}
template
<
typename
T
>
void
ROIAlignForward
(
const
int
nthreads
,
const
T
*
input
,
const
T
*
rois
,
T
*
output
,
T
*
argmax_y
,
T
*
argmax_x
,
const
int
pooled_height
,
const
int
pooled_width
,
const
T
spatial_scale
,
const
int
sampling_ratio
,
const
int
pool_mode
,
// 0 - max pool, 1 - avg pool
const
bool
aligned
,
const
int
channels
,
const
int
height
,
const
int
width
)
{
int
n_rois
=
nthreads
/
channels
/
pooled_width
/
pooled_height
;
// (n, c, ph, pw) is an element in the pooled output
// can be parallelized using omp
// #pragma omp parallel for num_threads(32)
for
(
int
n
=
0
;
n
<
n_rois
;
n
++
)
{
int
index_n
=
n
*
channels
*
pooled_width
*
pooled_height
;
const
T
*
offset_rois
=
rois
+
n
*
5
;
int
roi_batch_ind
=
offset_rois
[
0
];
// Do not use rounding; this implementation detail is critical
T
offset
=
aligned
?
(
T
)
0.5
:
(
T
)
0.0
;
T
roi_start_w
=
offset_rois
[
1
]
*
spatial_scale
-
offset
;
T
roi_start_h
=
offset_rois
[
2
]
*
spatial_scale
-
offset
;
T
roi_end_w
=
offset_rois
[
3
]
*
spatial_scale
-
offset
;
T
roi_end_h
=
offset_rois
[
4
]
*
spatial_scale
-
offset
;
T
roi_width
=
roi_end_w
-
roi_start_w
;
T
roi_height
=
roi_end_h
-
roi_start_h
;
if
(
aligned
)
{
AT_ASSERTM
(
roi_width
>=
0
&&
roi_height
>=
0
,
"ROIs in ROIAlign cannot have non-negative size!"
);
}
else
{
// for backward-compatibility only
roi_width
=
std
::
max
(
roi_width
,
(
T
)
1.
);
roi_height
=
std
::
max
(
roi_height
,
(
T
)
1.
);
}
T
bin_size_h
=
static_cast
<
T
>
(
roi_height
)
/
static_cast
<
T
>
(
pooled_height
);
T
bin_size_w
=
static_cast
<
T
>
(
roi_width
)
/
static_cast
<
T
>
(
pooled_width
);
// We use roi_bin_grid to sample the grid and mimic integral
int
roi_bin_grid_h
=
(
sampling_ratio
>
0
)
?
sampling_ratio
:
ceil
(
roi_height
/
pooled_height
);
// e.g., = 2
int
roi_bin_grid_w
=
(
sampling_ratio
>
0
)
?
sampling_ratio
:
ceil
(
roi_width
/
pooled_width
);
// When the grid is empty, output zeros == 0/1, instead of NaN.
const
T
count
=
std
::
max
(
roi_bin_grid_h
*
roi_bin_grid_w
,
1
);
// e.g. = 4
// we want to precalculate indices and weights shared by all channels,
// this is the key point of optimization
std
::
vector
<
PreCalc
<
T
>>
pre_calc
(
roi_bin_grid_h
*
roi_bin_grid_w
*
pooled_width
*
pooled_height
);
pre_calc_for_bilinear_interpolate
(
height
,
width
,
pooled_height
,
pooled_width
,
roi_bin_grid_h
,
roi_bin_grid_w
,
roi_start_h
,
roi_start_w
,
bin_size_h
,
bin_size_w
,
roi_bin_grid_h
,
roi_bin_grid_w
,
pre_calc
);
for
(
int
c
=
0
;
c
<
channels
;
c
++
)
{
int
index_n_c
=
index_n
+
c
*
pooled_width
*
pooled_height
;
const
T
*
offset_input
=
input
+
(
roi_batch_ind
*
channels
+
c
)
*
height
*
width
;
int
pre_calc_index
=
0
;
for
(
int
ph
=
0
;
ph
<
pooled_height
;
ph
++
)
{
for
(
int
pw
=
0
;
pw
<
pooled_width
;
pw
++
)
{
int
index
=
index_n_c
+
ph
*
pooled_width
+
pw
;
T
output_val
=
0.
;
T
maxval
=
-
10000
;
T
maxidx_y
=
-
1.
f
,
maxidx_x
=
-
1.
f
;
for
(
int
iy
=
0
;
iy
<
roi_bin_grid_h
;
iy
++
)
{
const
T
y
=
roi_start_h
+
ph
*
bin_size_h
+
static_cast
<
T
>
(
iy
+
.5
f
)
*
bin_size_h
/
static_cast
<
T
>
(
roi_bin_grid_h
);
for
(
int
ix
=
0
;
ix
<
roi_bin_grid_w
;
ix
++
)
{
const
T
x
=
roi_start_w
+
pw
*
bin_size_w
+
static_cast
<
T
>
(
ix
+
.5
f
)
*
bin_size_w
/
static_cast
<
T
>
(
roi_bin_grid_w
);
PreCalc
<
T
>
pc
=
pre_calc
[
pre_calc_index
];
T
val
=
pc
.
w1
*
offset_input
[
pc
.
pos1
]
+
pc
.
w2
*
offset_input
[
pc
.
pos2
]
+
pc
.
w3
*
offset_input
[
pc
.
pos3
]
+
pc
.
w4
*
offset_input
[
pc
.
pos4
];
if
(
val
>
maxval
)
{
maxval
=
val
;
maxidx_y
=
y
;
maxidx_x
=
x
;
}
output_val
+=
val
;
pre_calc_index
+=
1
;
}
}
if
(
pool_mode
==
0
)
{
// We do max pooling inside a bin
output
[
index
]
=
maxval
;
argmax_y
[
index
]
=
maxidx_y
;
argmax_x
[
index
]
=
maxidx_x
;
}
else
if
(
pool_mode
==
1
)
{
// We do average (integral) pooling inside a bin
output
[
index
]
=
output_val
/
count
;
}
// if
}
// for pw
}
// for ph
}
// for c
}
// for n
}
template
<
typename
T
>
void
bilinear_interpolate_gradient
(
const
int
height
,
const
int
width
,
T
y
,
T
x
,
T
&
w1
,
T
&
w2
,
T
&
w3
,
T
&
w4
,
int
&
x_low
,
int
&
x_high
,
int
&
y_low
,
int
&
y_high
,
const
int
index
/* index for debug only*/
)
{
// deal with cases that inverse elements are out of feature map boundary
if
(
y
<
-
1.0
||
y
>
height
||
x
<
-
1.0
||
x
>
width
)
{
// empty
w1
=
w2
=
w3
=
w4
=
0.
;
x_low
=
x_high
=
y_low
=
y_high
=
-
1
;
return
;
}
if
(
y
<=
0
)
y
=
0
;
if
(
x
<=
0
)
x
=
0
;
y_low
=
(
int
)
y
;
x_low
=
(
int
)
x
;
if
(
y_low
>=
height
-
1
)
{
y_high
=
y_low
=
height
-
1
;
y
=
(
T
)
y_low
;
}
else
{
y_high
=
y_low
+
1
;
}
if
(
x_low
>=
width
-
1
)
{
x_high
=
x_low
=
width
-
1
;
x
=
(
T
)
x_low
;
}
else
{
x_high
=
x_low
+
1
;
}
T
ly
=
y
-
y_low
;
T
lx
=
x
-
x_low
;
T
hy
=
1.
-
ly
,
hx
=
1.
-
lx
;
// reference in forward
// T v1 = input[y_low * width + x_low];
// T v2 = input[y_low * width + x_high];
// T v3 = input[y_high * width + x_low];
// T v4 = input[y_high * width + x_high];
// T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
w1
=
hy
*
hx
,
w2
=
hy
*
lx
,
w3
=
ly
*
hx
,
w4
=
ly
*
lx
;
return
;
}
template
<
class
T
>
inline
void
add
(
T
*
address
,
const
T
&
val
)
{
*
address
+=
val
;
}
template
<
typename
T
>
void
ROIAlignBackward
(
const
int
nthreads
,
const
T
*
grad_output
,
const
T
*
rois
,
const
T
*
argmax_y
,
const
T
*
argmax_x
,
T
*
grad_input
,
const
int
pooled_height
,
const
int
pooled_width
,
const
T
spatial_scale
,
const
int
sampling_ratio
,
const
int
pool_mode
,
// 0 - max pool, 1 - avg pool
const
bool
aligned
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
n_stride
,
const
int
c_stride
,
const
int
h_stride
,
const
int
w_stride
)
{
for
(
int
index
=
0
;
index
<
nthreads
;
index
++
)
{
// (n, c, ph, pw) is an element in the pooled output
int
pw
=
index
%
pooled_width
;
int
ph
=
(
index
/
pooled_width
)
%
pooled_height
;
int
c
=
(
index
/
pooled_width
/
pooled_height
)
%
channels
;
int
n
=
index
/
pooled_width
/
pooled_height
/
channels
;
const
T
*
offset_rois
=
rois
+
n
*
5
;
int
roi_batch_ind
=
offset_rois
[
0
];
// Do not use rounding; this implementation detail is critical
T
offset
=
aligned
?
(
T
)
0.5
:
(
T
)
0.0
;
T
roi_start_w
=
offset_rois
[
1
]
*
spatial_scale
-
offset
;
T
roi_start_h
=
offset_rois
[
2
]
*
spatial_scale
-
offset
;
T
roi_end_w
=
offset_rois
[
3
]
*
spatial_scale
-
offset
;
T
roi_end_h
=
offset_rois
[
4
]
*
spatial_scale
-
offset
;
T
roi_width
=
roi_end_w
-
roi_start_w
;
T
roi_height
=
roi_end_h
-
roi_start_h
;
if
(
aligned
)
{
AT_ASSERTM
(
roi_width
>=
0
&&
roi_height
>=
0
,
"ROIs in ROIAlign do not have non-negative size!"
);
}
else
{
// for backward-compatibility only
roi_width
=
std
::
max
(
roi_width
,
(
T
)
1.
);
roi_height
=
std
::
max
(
roi_height
,
(
T
)
1.
);
}
T
bin_size_h
=
static_cast
<
T
>
(
roi_height
)
/
static_cast
<
T
>
(
pooled_height
);
T
bin_size_w
=
static_cast
<
T
>
(
roi_width
)
/
static_cast
<
T
>
(
pooled_width
);
T
*
offset_grad_input
=
grad_input
+
((
roi_batch_ind
*
channels
+
c
)
*
height
*
width
);
int
output_offset
=
n
*
n_stride
+
c
*
c_stride
;
const
T
*
offset_grad_output
=
grad_output
+
output_offset
;
const
T
grad_output_this_bin
=
offset_grad_output
[
ph
*
h_stride
+
pw
*
w_stride
];
if
(
pool_mode
==
0
)
{
// We do max pooling inside a bin
T
y
=
argmax_y
[
index
],
x
=
argmax_x
[
index
];
if
(
y
!=
-
1.
f
)
{
T
w1
,
w2
,
w3
,
w4
;
int
x_low
,
x_high
,
y_low
,
y_high
;
bilinear_interpolate_gradient
(
height
,
width
,
y
,
x
,
w1
,
w2
,
w3
,
w4
,
x_low
,
x_high
,
y_low
,
y_high
,
index
);
T
g1
=
grad_output_this_bin
*
w1
;
T
g2
=
grad_output_this_bin
*
w2
;
T
g3
=
grad_output_this_bin
*
w3
;
T
g4
=
grad_output_this_bin
*
w4
;
if
(
x_low
>=
0
&&
x_high
>=
0
&&
y_low
>=
0
&&
y_high
>=
0
)
{
// atomic add is not needed for now since it is single threaded
add
(
offset_grad_input
+
y_low
*
width
+
x_low
,
static_cast
<
T
>
(
g1
));
add
(
offset_grad_input
+
y_low
*
width
+
x_high
,
static_cast
<
T
>
(
g2
));
add
(
offset_grad_input
+
y_high
*
width
+
x_low
,
static_cast
<
T
>
(
g3
));
add
(
offset_grad_input
+
y_high
*
width
+
x_high
,
static_cast
<
T
>
(
g4
));
}
// if
}
// mode
}
else
if
(
pool_mode
==
1
)
{
// We do average (integral) pooling inside a bin
// We use roi_bin_grid to sample the grid and mimic integral
int
roi_bin_grid_h
=
(
sampling_ratio
>
0
)
?
sampling_ratio
:
ceil
(
roi_height
/
pooled_height
);
// e.g., = 2
int
roi_bin_grid_w
=
(
sampling_ratio
>
0
)
?
sampling_ratio
:
ceil
(
roi_width
/
pooled_width
);
const
T
count
=
roi_bin_grid_h
*
roi_bin_grid_w
;
// e.g. = 4
for
(
int
iy
=
0
;
iy
<
roi_bin_grid_h
;
iy
++
)
{
const
T
y
=
roi_start_h
+
ph
*
bin_size_h
+
static_cast
<
T
>
(
iy
+
.5
f
)
*
bin_size_h
/
static_cast
<
T
>
(
roi_bin_grid_h
);
// e.g., 0.5, 1.5
for
(
int
ix
=
0
;
ix
<
roi_bin_grid_w
;
ix
++
)
{
const
T
x
=
roi_start_w
+
pw
*
bin_size_w
+
static_cast
<
T
>
(
ix
+
.5
f
)
*
bin_size_w
/
static_cast
<
T
>
(
roi_bin_grid_w
);
T
w1
,
w2
,
w3
,
w4
;
int
x_low
,
x_high
,
y_low
,
y_high
;
bilinear_interpolate_gradient
(
height
,
width
,
y
,
x
,
w1
,
w2
,
w3
,
w4
,
x_low
,
x_high
,
y_low
,
y_high
,
index
);
T
g1
=
grad_output_this_bin
*
w1
/
count
;
T
g2
=
grad_output_this_bin
*
w2
/
count
;
T
g3
=
grad_output_this_bin
*
w3
/
count
;
T
g4
=
grad_output_this_bin
*
w4
/
count
;
if
(
x_low
>=
0
&&
x_high
>=
0
&&
y_low
>=
0
&&
y_high
>=
0
)
{
// atomic add is not needed for now since it is single threaded
add
(
offset_grad_input
+
y_low
*
width
+
x_low
,
static_cast
<
T
>
(
g1
));
add
(
offset_grad_input
+
y_low
*
width
+
x_high
,
static_cast
<
T
>
(
g2
));
add
(
offset_grad_input
+
y_high
*
width
+
x_low
,
static_cast
<
T
>
(
g3
));
add
(
offset_grad_input
+
y_high
*
width
+
x_high
,
static_cast
<
T
>
(
g4
));
}
// if
}
// ix
}
// iy
}
// mode
}
// for
}
// ROIAlignBackward
void
ROIAlignForwardCPULauncher
(
Tensor
input
,
Tensor
rois
,
Tensor
output
,
Tensor
argmax_y
,
Tensor
argmax_x
,
int
aligned_height
,
int
aligned_width
,
float
spatial_scale
,
int
sampling_ratio
,
int
pool_mode
,
bool
aligned
)
{
int
output_size
=
output
.
numel
();
int
channels
=
input
.
size
(
1
);
int
height
=
input
.
size
(
2
);
int
width
=
input
.
size
(
3
);
AT_DISPATCH_FLOATING_TYPES_AND_HALF
(
input
.
scalar_type
(),
"ROIAlign_forward"
,
[
&
]
{
ROIAlignForward
<
scalar_t
>
(
output_size
,
input
.
data_ptr
<
scalar_t
>
(),
rois
.
data_ptr
<
scalar_t
>
(),
output
.
data_ptr
<
scalar_t
>
(),
argmax_y
.
data_ptr
<
scalar_t
>
(),
argmax_x
.
data_ptr
<
scalar_t
>
(),
aligned_height
,
aligned_width
,
static_cast
<
scalar_t
>
(
spatial_scale
),
sampling_ratio
,
pool_mode
,
aligned
,
channels
,
height
,
width
);
});
}
void
ROIAlignBackwardCPULauncher
(
Tensor
grad_output
,
Tensor
rois
,
Tensor
argmax_y
,
Tensor
argmax_x
,
Tensor
grad_input
,
int
aligned_height
,
int
aligned_width
,
float
spatial_scale
,
int
sampling_ratio
,
int
pool_mode
,
bool
aligned
)
{
int
output_size
=
grad_output
.
numel
();
int
channels
=
grad_input
.
size
(
1
);
int
height
=
grad_input
.
size
(
2
);
int
width
=
grad_input
.
size
(
3
);
// get stride values to ensure indexing into gradients is correct.
int
n_stride
=
grad_output
.
stride
(
0
);
int
c_stride
=
grad_output
.
stride
(
1
);
int
h_stride
=
grad_output
.
stride
(
2
);
int
w_stride
=
grad_output
.
stride
(
3
);
AT_DISPATCH_FLOATING_TYPES_AND_HALF
(
grad_output
.
scalar_type
(),
"ROIAlign_backward"
,
[
&
]
{
ROIAlignBackward
<
scalar_t
>
(
output_size
,
grad_output
.
data_ptr
<
scalar_t
>
(),
rois
.
data_ptr
<
scalar_t
>
(),
argmax_y
.
data_ptr
<
scalar_t
>
(),
argmax_x
.
data_ptr
<
scalar_t
>
(),
grad_input
.
data_ptr
<
scalar_t
>
(),
aligned_height
,
aligned_width
,
static_cast
<
scalar_t
>
(
spatial_scale
),
sampling_ratio
,
pool_mode
,
aligned
,
channels
,
height
,
width
,
n_stride
,
c_stride
,
h_stride
,
w_stride
);
});
}
mmcv/ops/csrc/parrots/roi_align_rotated.cpp
View file @
fdeee889
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
void
ROIAlignRotatedForwardCUDAKernelLauncher
(
const
at
::
Tensor
features
,
const
at
::
Tensor
rois
,
const
float
spatial_scale
,
const
int
sample_num
,
const
bool
aligned
,
const
bool
clockwise
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
num_rois
,
const
int
pooled_height
,
const
int
pooled_width
,
at
::
Tensor
output
);
void
ROIAlignRotatedBackwardCUDAKernelLauncher
(
const
at
::
Tensor
top_grad
,
const
at
::
Tensor
rois
,
const
float
spatial_scale
,
const
int
sample_num
,
const
bool
aligned
,
const
bool
clockwise
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
num_rois
,
const
int
pooled_height
,
const
int
pooled_width
,
at
::
Tensor
bottom_grad
);
void
roi_align_rotated_forward_cuda
(
Tensor
features
,
Tensor
rois
,
Tensor
output
,
int
pooled_height
,
int
pooled_width
,
float
spatial_scale
,
int
sample_num
,
void
roi_align_rotated_forward_impl
(
Tensor
features
,
Tensor
rois
,
Tensor
output
,
int
aligned_height
,
int
aligned_width
,
float
spatial_scale
,
int
sample_ratio
,
bool
aligned
,
bool
clockwise
)
{
// Number of ROIs
int
num_rois
=
rois
.
size
(
0
);
int
size_rois
=
rois
.
size
(
1
);
if
(
size_rois
!=
6
)
{
AT_ERROR
(
"wrong roi size"
);
}
int
num_channels
=
features
.
size
(
1
);
int
data_height
=
features
.
size
(
2
);
int
data_width
=
features
.
size
(
3
);
ROIAlignRotatedForwardCUDAKernelLauncher
(
features
,
rois
,
spatial_scale
,
sample_num
,
aligned
,
clockwise
,
num_channels
,
data_height
,
data_width
,
num_rois
,
pooled_height
,
pooled_width
,
output
);
DISPATCH_DEVICE_IMPL
(
roi_align_rotated_forward_impl
,
features
,
rois
,
output
,
aligned_height
,
aligned_width
,
spatial_scale
,
sample_ratio
,
aligned
,
clockwise
);
}
void
roi_align_rotated_backward_
cuda
(
Tensor
top_grad
,
Tensor
rois
,
Tensor
bottom_grad
,
int
pool
ed_height
,
int
pool
ed_width
,
float
spatial_scale
,
int
sample_
num
,
bool
aligned
,
void
roi_align_rotated_backward_
impl
(
Tensor
top_grad
,
Tensor
rois
,
Tensor
bottom_grad
,
int
align
ed_height
,
int
align
ed_width
,
float
spatial_scale
,
int
sample_
ratio
,
bool
aligned
,
bool
clockwise
)
{
// Number of ROIs
int
num_rois
=
rois
.
size
(
0
);
int
size_rois
=
rois
.
size
(
1
);
if
(
size_rois
!=
6
)
{
AT_ERROR
(
"wrong roi size"
);
}
int
num_channels
=
bottom_grad
.
size
(
1
);
int
data_height
=
bottom_grad
.
size
(
2
);
int
data_width
=
bottom_grad
.
size
(
3
);
ROIAlignRotatedBackwardCUDAKernelLauncher
(
top_grad
,
rois
,
spatial_scale
,
sample_num
,
aligned
,
clockwise
,
num_channels
,
data_height
,
data_width
,
num_rois
,
pooled_height
,
pooled_width
,
bottom_grad
);
}
#endif
void
ROIAlignRotatedForwardCPULauncher
(
Tensor
input
,
Tensor
rois
,
Tensor
output
,
int
aligned_height
,
int
aligned_width
,
float
spatial_scale
,
int
sampling_ratio
,
bool
aligned
,
bool
clockwise
);
void
ROIAlignRotatedBackwardCPULauncher
(
Tensor
grad_output
,
Tensor
rois
,
Tensor
grad_input
,
int
aligned_height
,
int
aligned_width
,
float
spatial_scale
,
int
sampling_ratio
,
bool
aligned
,
bool
clockwise
);
void
roi_align_rotated_forward_cpu
(
Tensor
features
,
Tensor
rois
,
Tensor
output
,
int
pooled_height
,
int
pooled_width
,
float
spatial_scale
,
int
sample_num
,
bool
aligned
,
bool
clockwise
)
{
ROIAlignRotatedForwardCPULauncher
(
features
,
rois
,
output
,
pooled_height
,
pooled_width
,
spatial_scale
,
sample_num
,
aligned
,
clockwise
);
}
void
roi_align_rotated_backward_cpu
(
Tensor
features
,
Tensor
rois
,
Tensor
output
,
int
pooled_height
,
int
pooled_width
,
float
spatial_scale
,
int
sample_num
,
bool
aligned
,
bool
clockwise
)
{
ROIAlignRotatedBackwardCPULauncher
(
features
,
rois
,
output
,
pooled_height
,
pooled_width
,
spatial_scale
,
sample_num
,
aligned
,
clockwise
);
DISPATCH_DEVICE_IMPL
(
roi_align_rotated_backward_impl
,
top_grad
,
rois
,
bottom_grad
,
aligned_height
,
aligned_width
,
spatial_scale
,
sample_ratio
,
aligned
,
clockwise
);
}
void
roi_align_rotated_forward
(
Tensor
input
,
Tensor
rois
,
Tensor
output
,
int
pool
ed_height
,
int
pool
ed_width
,
float
spatial_scale
,
int
sampl
e_num
,
int
align
ed_height
,
int
align
ed_width
,
float
spatial_scale
,
int
sampl
ing_ratio
,
bool
aligned
,
bool
clockwise
)
{
if
(
input
.
device
().
is_cuda
())
{
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT
(
input
);
CHECK_CUDA_INPUT
(
rois
);
CHECK_CUDA_INPUT
(
output
);
roi_align_rotated_forward_cuda
(
input
,
rois
,
output
,
pooled_height
,
pooled_width
,
spatial_scale
,
sample_num
,
aligned
,
clockwise
);
#else
AT_ERROR
(
"RoIAlignRotated is not compiled with GPU support"
);
#endif
}
else
{
CHECK_CPU_INPUT
(
input
);
CHECK_CPU_INPUT
(
rois
);
CHECK_CPU_INPUT
(
output
);
roi_align_rotated_forward_cpu
(
input
,
rois
,
output
,
pooled_height
,
pooled_width
,
spatial_scale
,
sample_num
,
aligned
,
clockwise
);
}
roi_align_rotated_forward_impl
(
input
,
rois
,
output
,
aligned_height
,
aligned_width
,
spatial_scale
,
sampling_ratio
,
aligned
,
clockwise
);
}
void
roi_align_rotated_backward
(
Tensor
grad_output
,
Tensor
rois
,
Tensor
grad_input
,
int
pooled_height
,
int
pooled_width
,
float
spatial_scale
,
int
sample_num
,
bool
aligned
,
bool
clockwise
)
{
if
(
grad_output
.
device
().
is_cuda
())
{
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT
(
grad_output
);
CHECK_CUDA_INPUT
(
rois
);
CHECK_CUDA_INPUT
(
grad_input
);
roi_align_rotated_backward_cuda
(
grad_output
,
rois
,
grad_input
,
pooled_height
,
pooled_width
,
spatial_scale
,
sample_num
,
aligned
,
clockwise
);
#else
AT_ERROR
(
"RoIAlignRotated is not compiled with GPU support"
);
#endif
}
else
{
CHECK_CPU_INPUT
(
grad_output
);
CHECK_CPU_INPUT
(
rois
);
CHECK_CPU_INPUT
(
grad_input
);
roi_align_rotated_backward_cpu
(
grad_output
,
rois
,
grad_input
,
pooled_height
,
pooled_width
,
spatial_scale
,
sample_num
,
aligned
,
clockwise
);
}
void
roi_align_rotated_backward
(
Tensor
top_grad
,
Tensor
rois
,
Tensor
bottom_grad
,
int
aligned_height
,
int
aligned_width
,
float
spatial_scale
,
int
sampling_ratio
,
bool
aligned
,
bool
clockwise
)
{
roi_align_rotated_backward_impl
(
top_grad
,
rois
,
bottom_grad
,
aligned_height
,
aligned_width
,
spatial_scale
,
sampling_ratio
,
aligned
,
clockwise
);
}
mmcv/ops/csrc/parrots/roi_align_rotated_cpu.cpp
deleted
100644 → 0
View file @
df465820
// Modified from
// https://github.com/facebookresearch/detectron2/tree/master/detectron2/layers/csrc/ROIAlignRotated
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
#include <ATen/ATen.h>
#include <ATen/TensorUtils.h>
#include "../pytorch_cpp_helper.hpp"
// implementation taken from Caffe2
template
<
typename
T
>
struct
PreCalc
{
int
pos1
;
int
pos2
;
int
pos3
;
int
pos4
;
T
w1
;
T
w2
;
T
w3
;
T
w4
;
};
template
<
typename
T
>
void
pre_calc_for_bilinear_interpolate
(
const
int
height
,
const
int
width
,
const
int
pooled_height
,
const
int
pooled_width
,
const
int
iy_upper
,
const
int
ix_upper
,
T
roi_start_h
,
T
roi_start_w
,
T
bin_size_h
,
T
bin_size_w
,
int
roi_bin_grid_h
,
int
roi_bin_grid_w
,
T
roi_center_h
,
T
roi_center_w
,
T
cos_theta
,
T
sin_theta
,
std
::
vector
<
PreCalc
<
T
>>&
pre_calc
)
{
int
pre_calc_index
=
0
;
for
(
int
ph
=
0
;
ph
<
pooled_height
;
ph
++
)
{
for
(
int
pw
=
0
;
pw
<
pooled_width
;
pw
++
)
{
for
(
int
iy
=
0
;
iy
<
iy_upper
;
iy
++
)
{
const
T
yy
=
roi_start_h
+
ph
*
bin_size_h
+
static_cast
<
T
>
(
iy
+
.5
f
)
*
bin_size_h
/
static_cast
<
T
>
(
roi_bin_grid_h
);
// e.g., 0.5, 1.5
for
(
int
ix
=
0
;
ix
<
ix_upper
;
ix
++
)
{
const
T
xx
=
roi_start_w
+
pw
*
bin_size_w
+
static_cast
<
T
>
(
ix
+
.5
f
)
*
bin_size_w
/
static_cast
<
T
>
(
roi_bin_grid_w
);
// Rotate by theta around the center and translate
// In image space, (y, x) is the order for Right Handed System,
// and this is essentially multiplying the point by a rotation matrix
// to rotate it counterclockwise through angle theta.
T
y
=
yy
*
cos_theta
-
xx
*
sin_theta
+
roi_center_h
;
T
x
=
yy
*
sin_theta
+
xx
*
cos_theta
+
roi_center_w
;
// deal with: inverse elements are out of feature map boundary
if
(
y
<
-
1.0
||
y
>
height
||
x
<
-
1.0
||
x
>
width
)
{
// empty
PreCalc
<
T
>
pc
;
pc
.
pos1
=
0
;
pc
.
pos2
=
0
;
pc
.
pos3
=
0
;
pc
.
pos4
=
0
;
pc
.
w1
=
0
;
pc
.
w2
=
0
;
pc
.
w3
=
0
;
pc
.
w4
=
0
;
pre_calc
[
pre_calc_index
]
=
pc
;
pre_calc_index
+=
1
;
continue
;
}
if
(
y
<
0
)
{
y
=
0
;
}
if
(
x
<
0
)
{
x
=
0
;
}
int
y_low
=
(
int
)
y
;
int
x_low
=
(
int
)
x
;
int
y_high
;
int
x_high
;
if
(
y_low
>=
height
-
1
)
{
y_high
=
y_low
=
height
-
1
;
y
=
(
T
)
y_low
;
}
else
{
y_high
=
y_low
+
1
;
}
if
(
x_low
>=
width
-
1
)
{
x_high
=
x_low
=
width
-
1
;
x
=
(
T
)
x_low
;
}
else
{
x_high
=
x_low
+
1
;
}
T
ly
=
y
-
y_low
;
T
lx
=
x
-
x_low
;
T
hy
=
1.
-
ly
,
hx
=
1.
-
lx
;
T
w1
=
hy
*
hx
,
w2
=
hy
*
lx
,
w3
=
ly
*
hx
,
w4
=
ly
*
lx
;
// save weights and indices
PreCalc
<
T
>
pc
;
pc
.
pos1
=
y_low
*
width
+
x_low
;
pc
.
pos2
=
y_low
*
width
+
x_high
;
pc
.
pos3
=
y_high
*
width
+
x_low
;
pc
.
pos4
=
y_high
*
width
+
x_high
;
pc
.
w1
=
w1
;
pc
.
w2
=
w2
;
pc
.
w3
=
w3
;
pc
.
w4
=
w4
;
pre_calc
[
pre_calc_index
]
=
pc
;
pre_calc_index
+=
1
;
}
}
}
}
}
template
<
typename
T
>
void
ROIAlignRotatedForward
(
const
int
nthreads
,
const
T
*
input
,
const
T
&
spatial_scale
,
const
bool
aligned
,
const
bool
clockwise
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
pooled_height
,
const
int
pooled_width
,
const
int
sampling_ratio
,
const
T
*
rois
,
T
*
output
)
{
int
n_rois
=
nthreads
/
channels
/
pooled_width
/
pooled_height
;
// (n, c, ph, pw) is an element in the pooled output
// can be parallelized using omp
// #pragma omp parallel for num_threads(32)
for
(
int
n
=
0
;
n
<
n_rois
;
n
++
)
{
int
index_n
=
n
*
channels
*
pooled_width
*
pooled_height
;
const
T
*
current_roi
=
rois
+
n
*
6
;
int
roi_batch_ind
=
current_roi
[
0
];
// Do not use rounding; this implementation detail is critical
T
offset
=
aligned
?
(
T
)
0.5
:
(
T
)
0.0
;
T
roi_center_w
=
current_roi
[
1
]
*
spatial_scale
-
offset
;
T
roi_center_h
=
current_roi
[
2
]
*
spatial_scale
-
offset
;
T
roi_width
=
current_roi
[
3
]
*
spatial_scale
;
T
roi_height
=
current_roi
[
4
]
*
spatial_scale
;
T
theta
=
current_roi
[
5
];
if
(
clockwise
)
{
theta
=
-
theta
;
// If clockwise, the angle needs to be reversed.
}
T
cos_theta
=
cos
(
theta
);
T
sin_theta
=
sin
(
theta
);
if
(
aligned
)
{
AT_ASSERTM
(
roi_width
>=
0
&&
roi_height
>=
0
,
"ROIs in ROIAlignRotated do not have non-negative size!"
);
}
else
{
// for backward-compatibility only
roi_width
=
std
::
max
(
roi_width
,
(
T
)
1.
);
roi_height
=
std
::
max
(
roi_height
,
(
T
)
1.
);
}
T
bin_size_h
=
static_cast
<
T
>
(
roi_height
)
/
static_cast
<
T
>
(
pooled_height
);
T
bin_size_w
=
static_cast
<
T
>
(
roi_width
)
/
static_cast
<
T
>
(
pooled_width
);
// We use roi_bin_grid to sample the grid and mimic integral
int
roi_bin_grid_h
=
(
sampling_ratio
>
0
)
?
sampling_ratio
:
ceil
(
roi_height
/
pooled_height
);
// e.g., = 2
int
roi_bin_grid_w
=
(
sampling_ratio
>
0
)
?
sampling_ratio
:
ceil
(
roi_width
/
pooled_width
);
// We do average (integral) pooling inside a bin
const
T
count
=
std
::
max
(
roi_bin_grid_h
*
roi_bin_grid_w
,
1
);
// e.g. = 4
// we want to precalculate indices and weights shared by all channels,
// this is the key point of optimization
std
::
vector
<
PreCalc
<
T
>>
pre_calc
(
roi_bin_grid_h
*
roi_bin_grid_w
*
pooled_width
*
pooled_height
);
// roi_start_h and roi_start_w are computed wrt the center of RoI (x, y).
// Appropriate translation needs to be applied after.
T
roi_start_h
=
-
roi_height
/
2.0
;
T
roi_start_w
=
-
roi_width
/
2.0
;
pre_calc_for_bilinear_interpolate
(
height
,
width
,
pooled_height
,
pooled_width
,
roi_bin_grid_h
,
roi_bin_grid_w
,
roi_start_h
,
roi_start_w
,
bin_size_h
,
bin_size_w
,
roi_bin_grid_h
,
roi_bin_grid_w
,
roi_center_h
,
roi_center_w
,
cos_theta
,
sin_theta
,
pre_calc
);
for
(
int
c
=
0
;
c
<
channels
;
c
++
)
{
int
index_n_c
=
index_n
+
c
*
pooled_width
*
pooled_height
;
const
T
*
offset_input
=
input
+
(
roi_batch_ind
*
channels
+
c
)
*
height
*
width
;
int
pre_calc_index
=
0
;
for
(
int
ph
=
0
;
ph
<
pooled_height
;
ph
++
)
{
for
(
int
pw
=
0
;
pw
<
pooled_width
;
pw
++
)
{
int
index
=
index_n_c
+
ph
*
pooled_width
+
pw
;
T
output_val
=
0.
;
for
(
int
iy
=
0
;
iy
<
roi_bin_grid_h
;
iy
++
)
{
for
(
int
ix
=
0
;
ix
<
roi_bin_grid_w
;
ix
++
)
{
PreCalc
<
T
>
pc
=
pre_calc
[
pre_calc_index
];
output_val
+=
pc
.
w1
*
offset_input
[
pc
.
pos1
]
+
pc
.
w2
*
offset_input
[
pc
.
pos2
]
+
pc
.
w3
*
offset_input
[
pc
.
pos3
]
+
pc
.
w4
*
offset_input
[
pc
.
pos4
];
pre_calc_index
+=
1
;
}
}
output_val
/=
count
;
output
[
index
]
=
output_val
;
}
// for pw
}
// for ph
}
// for c
}
// for n
}
template
<
typename
T
>
void
bilinear_interpolate_gradient
(
const
int
height
,
const
int
width
,
T
y
,
T
x
,
T
&
w1
,
T
&
w2
,
T
&
w3
,
T
&
w4
,
int
&
x_low
,
int
&
x_high
,
int
&
y_low
,
int
&
y_high
)
{
// deal with cases that inverse elements are out of feature map boundary
if
(
y
<
-
1.0
||
y
>
height
||
x
<
-
1.0
||
x
>
width
)
{
// empty
w1
=
w2
=
w3
=
w4
=
0.
;
x_low
=
x_high
=
y_low
=
y_high
=
-
1
;
return
;
}
if
(
y
<
0
)
{
y
=
0
;
}
if
(
x
<
0
)
{
x
=
0
;
}
y_low
=
(
int
)
y
;
x_low
=
(
int
)
x
;
if
(
y_low
>=
height
-
1
)
{
y_high
=
y_low
=
height
-
1
;
y
=
(
T
)
y_low
;
}
else
{
y_high
=
y_low
+
1
;
}
if
(
x_low
>=
width
-
1
)
{
x_high
=
x_low
=
width
-
1
;
x
=
(
T
)
x_low
;
}
else
{
x_high
=
x_low
+
1
;
}
T
ly
=
y
-
y_low
;
T
lx
=
x
-
x_low
;
T
hy
=
1.
-
ly
,
hx
=
1.
-
lx
;
// reference in forward
// T v1 = input[y_low * width + x_low];
// T v2 = input[y_low * width + x_high];
// T v3 = input[y_high * width + x_low];
// T v4 = input[y_high * width + x_high];
// T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
w1
=
hy
*
hx
,
w2
=
hy
*
lx
,
w3
=
ly
*
hx
,
w4
=
ly
*
lx
;
return
;
}
template
<
class
T
>
inline
void
add
(
T
*
address
,
const
T
&
val
)
{
*
address
+=
val
;
}
template
<
typename
T
>
void
ROIAlignRotatedBackward
(
const
int
nthreads
,
// may not be contiguous. should index using n_stride, etc
const
T
*
grad_output
,
const
T
&
spatial_scale
,
const
bool
aligned
,
const
bool
clockwise
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
pooled_height
,
const
int
pooled_width
,
const
int
sampling_ratio
,
T
*
grad_input
,
const
T
*
rois
,
const
int
n_stride
,
const
int
c_stride
,
const
int
h_stride
,
const
int
w_stride
)
{
for
(
int
index
=
0
;
index
<
nthreads
;
index
++
)
{
// (n, c, ph, pw) is an element in the pooled output
int
pw
=
index
%
pooled_width
;
int
ph
=
(
index
/
pooled_width
)
%
pooled_height
;
int
c
=
(
index
/
pooled_width
/
pooled_height
)
%
channels
;
int
n
=
index
/
pooled_width
/
pooled_height
/
channels
;
const
T
*
current_roi
=
rois
+
n
*
6
;
int
roi_batch_ind
=
current_roi
[
0
];
// Do not use rounding; this implementation detail is critical
T
offset
=
aligned
?
(
T
)
0.5
:
(
T
)
0.0
;
T
roi_center_w
=
current_roi
[
1
]
*
spatial_scale
-
offset
;
T
roi_center_h
=
current_roi
[
2
]
*
spatial_scale
-
offset
;
T
roi_width
=
current_roi
[
3
]
*
spatial_scale
;
T
roi_height
=
current_roi
[
4
]
*
spatial_scale
;
T
theta
=
current_roi
[
5
];
if
(
clockwise
)
{
theta
=
-
theta
;
// If clockwise, the angle needs to be reversed.
}
T
cos_theta
=
cos
(
theta
);
T
sin_theta
=
sin
(
theta
);
if
(
aligned
)
{
AT_ASSERTM
(
roi_width
>=
0
&&
roi_height
>=
0
,
"ROIs in ROIAlignRotated do not have non-negative size!"
);
}
else
{
// for backward-compatibility only
roi_width
=
std
::
max
(
roi_width
,
(
T
)
1.
);
roi_height
=
std
::
max
(
roi_height
,
(
T
)
1.
);
}
T
bin_size_h
=
static_cast
<
T
>
(
roi_height
)
/
static_cast
<
T
>
(
pooled_height
);
T
bin_size_w
=
static_cast
<
T
>
(
roi_width
)
/
static_cast
<
T
>
(
pooled_width
);
T
*
offset_grad_input
=
grad_input
+
((
roi_batch_ind
*
channels
+
c
)
*
height
*
width
);
int
output_offset
=
n
*
n_stride
+
c
*
c_stride
;
const
T
*
offset_grad_output
=
grad_output
+
output_offset
;
const
T
grad_output_this_bin
=
offset_grad_output
[
ph
*
h_stride
+
pw
*
w_stride
];
// We use roi_bin_grid to sample the grid and mimic integral
int
roi_bin_grid_h
=
(
sampling_ratio
>
0
)
?
sampling_ratio
:
ceil
(
roi_height
/
pooled_height
);
// e.g., = 2
int
roi_bin_grid_w
=
(
sampling_ratio
>
0
)
?
sampling_ratio
:
ceil
(
roi_width
/
pooled_width
);
// roi_start_h and roi_start_w are computed wrt the center of RoI (x, y).
// Appropriate translation needs to be applied after.
T
roi_start_h
=
-
roi_height
/
2.0
;
T
roi_start_w
=
-
roi_width
/
2.0
;
// We do average (integral) pooling inside a bin
const
T
count
=
roi_bin_grid_h
*
roi_bin_grid_w
;
// e.g. = 4
for
(
int
iy
=
0
;
iy
<
roi_bin_grid_h
;
iy
++
)
{
const
T
yy
=
roi_start_h
+
ph
*
bin_size_h
+
static_cast
<
T
>
(
iy
+
.5
f
)
*
bin_size_h
/
static_cast
<
T
>
(
roi_bin_grid_h
);
// e.g., 0.5, 1.5
for
(
int
ix
=
0
;
ix
<
roi_bin_grid_w
;
ix
++
)
{
const
T
xx
=
roi_start_w
+
pw
*
bin_size_w
+
static_cast
<
T
>
(
ix
+
.5
f
)
*
bin_size_w
/
static_cast
<
T
>
(
roi_bin_grid_w
);
// Rotate by theta around the center and translate
T
y
=
yy
*
cos_theta
-
xx
*
sin_theta
+
roi_center_h
;
T
x
=
yy
*
sin_theta
+
xx
*
cos_theta
+
roi_center_w
;
T
w1
,
w2
,
w3
,
w4
;
int
x_low
,
x_high
,
y_low
,
y_high
;
bilinear_interpolate_gradient
(
height
,
width
,
y
,
x
,
w1
,
w2
,
w3
,
w4
,
x_low
,
x_high
,
y_low
,
y_high
);
T
g1
=
grad_output_this_bin
*
w1
/
count
;
T
g2
=
grad_output_this_bin
*
w2
/
count
;
T
g3
=
grad_output_this_bin
*
w3
/
count
;
T
g4
=
grad_output_this_bin
*
w4
/
count
;
if
(
x_low
>=
0
&&
x_high
>=
0
&&
y_low
>=
0
&&
y_high
>=
0
)
{
// atomic add is not needed for now since it is single threaded
add
(
offset_grad_input
+
y_low
*
width
+
x_low
,
static_cast
<
T
>
(
g1
));
add
(
offset_grad_input
+
y_low
*
width
+
x_high
,
static_cast
<
T
>
(
g2
));
add
(
offset_grad_input
+
y_high
*
width
+
x_low
,
static_cast
<
T
>
(
g3
));
add
(
offset_grad_input
+
y_high
*
width
+
x_high
,
static_cast
<
T
>
(
g4
));
}
// if
}
// ix
}
// iy
}
// for
}
// ROIAlignRotatedBackward
void
ROIAlignRotatedForwardCPULauncher
(
Tensor
input
,
Tensor
rois
,
Tensor
output
,
int
aligned_height
,
int
aligned_width
,
float
spatial_scale
,
int
sampling_ratio
,
bool
aligned
,
bool
clockwise
)
{
int
output_size
=
output
.
numel
();
int
channels
=
input
.
size
(
1
);
int
height
=
input
.
size
(
2
);
int
width
=
input
.
size
(
3
);
AT_DISPATCH_FLOATING_TYPES_AND_HALF
(
input
.
scalar_type
(),
"ROIAlignRotated_forward"
,
[
&
]
{
ROIAlignRotatedForward
<
scalar_t
>
(
output_size
,
input
.
data_ptr
<
scalar_t
>
(),
static_cast
<
scalar_t
>
(
spatial_scale
),
aligned
,
clockwise
,
channels
,
height
,
width
,
aligned_height
,
aligned_width
,
sampling_ratio
,
rois
.
data_ptr
<
scalar_t
>
(),
output
.
data_ptr
<
scalar_t
>
());
});
}
void
ROIAlignRotatedBackwardCPULauncher
(
Tensor
grad_output
,
Tensor
rois
,
Tensor
grad_input
,
int
aligned_height
,
int
aligned_width
,
float
spatial_scale
,
int
sampling_ratio
,
bool
aligned
,
bool
clockwise
)
{
int
output_size
=
grad_output
.
numel
();
int
channels
=
grad_input
.
size
(
1
);
int
height
=
grad_input
.
size
(
2
);
int
width
=
grad_input
.
size
(
3
);
// get stride values to ensure indexing into gradients is correct.
int
n_stride
=
grad_output
.
stride
(
0
);
int
c_stride
=
grad_output
.
stride
(
1
);
int
h_stride
=
grad_output
.
stride
(
2
);
int
w_stride
=
grad_output
.
stride
(
3
);
AT_DISPATCH_FLOATING_TYPES_AND_HALF
(
grad_output
.
scalar_type
(),
"ROIAlignRotated_backward"
,
[
&
]
{
ROIAlignRotatedBackward
<
scalar_t
>
(
grad_output
.
numel
(),
grad_output
.
data_ptr
<
scalar_t
>
(),
static_cast
<
scalar_t
>
(
spatial_scale
),
aligned
,
clockwise
,
channels
,
height
,
width
,
aligned_height
,
aligned_width
,
sampling_ratio
,
grad_input
.
data_ptr
<
scalar_t
>
(),
rois
.
data_ptr
<
scalar_t
>
(),
n_stride
,
c_stride
,
h_stride
,
w_stride
);
});
}
mmcv/ops/csrc/parrots/roi_align_rotated_parrots.cpp
View file @
fdeee889
...
...
@@ -14,14 +14,14 @@ void roi_align_rotated_forward_cuda_parrots(CudaContext& ctx,
int
pooled_height
;
int
pooled_width
;
float
spatial_scale
;
int
sampl
e_num
;
int
sampl
ing_ratio
;
bool
aligned
;
bool
clockwise
;
SSAttrs
(
attr
)
.
get
<
int
>
(
"pooled_height"
,
pooled_height
)
.
get
<
int
>
(
"pooled_width"
,
pooled_width
)
.
get
<
float
>
(
"spatial_scale"
,
spatial_scale
)
.
get
<
int
>
(
"sampl
e_num"
,
sample_num
)
.
get
<
int
>
(
"sampl
ing_ratio"
,
sampling_ratio
)
.
get
<
bool
>
(
"aligned"
,
aligned
)
.
get
<
bool
>
(
"clockwise"
,
clockwise
)
.
done
();
...
...
@@ -30,7 +30,7 @@ void roi_align_rotated_forward_cuda_parrots(CudaContext& ctx,
const
auto
&
rois
=
buildATensor
(
ctx
,
ins
[
1
]);
auto
output
=
buildATensor
(
ctx
,
outs
[
0
]);
roi_align_rotated_forward_cuda
(
input
,
rois
,
output
,
pooled_height
,
pooled_width
,
spatial_scale
,
sampl
e_num
,
pooled_width
,
spatial_scale
,
sampl
ing_ratio
,
aligned
,
clockwise
);
}
...
...
@@ -41,14 +41,14 @@ void roi_align_rotated_backward_cuda_parrots(CudaContext& ctx,
int
pooled_height
;
int
pooled_width
;
float
spatial_scale
;
int
sampl
e_num
;
int
sampl
ing_ratio
;
bool
aligned
;
bool
clockwise
;
SSAttrs
(
attr
)
.
get
<
int
>
(
"pooled_height"
,
pooled_height
)
.
get
<
int
>
(
"pooled_width"
,
pooled_width
)
.
get
<
float
>
(
"spatial_scale"
,
spatial_scale
)
.
get
<
int
>
(
"sampl
e_num"
,
sample_num
)
.
get
<
int
>
(
"sampl
ing_ratio"
,
sampling_ratio
)
.
get
<
bool
>
(
"aligned"
,
aligned
)
.
get
<
bool
>
(
"clockwise"
,
clockwise
)
.
done
();
...
...
@@ -57,7 +57,7 @@ void roi_align_rotated_backward_cuda_parrots(CudaContext& ctx,
const
auto
&
rois
=
buildATensor
(
ctx
,
ins
[
1
]);
auto
grad_input
=
buildATensor
(
ctx
,
outs
[
0
]);
roi_align_rotated_backward_cuda
(
grad_output
,
rois
,
grad_input
,
pooled_height
,
pooled_width
,
spatial_scale
,
sampl
e_num
,
pooled_width
,
spatial_scale
,
sampl
ing_ratio
,
aligned
,
clockwise
);
}
#endif
...
...
@@ -69,14 +69,14 @@ void roi_align_rotated_forward_cpu_parrots(HostContext& ctx,
int
pooled_height
;
int
pooled_width
;
float
spatial_scale
;
int
sampl
e_num
;
int
sampl
ing_ratio
;
bool
aligned
;
bool
clockwise
;
SSAttrs
(
attr
)
.
get
<
int
>
(
"pooled_height"
,
pooled_height
)
.
get
<
int
>
(
"pooled_width"
,
pooled_width
)
.
get
<
float
>
(
"spatial_scale"
,
spatial_scale
)
.
get
<
int
>
(
"sampl
e_num"
,
sample_num
)
.
get
<
int
>
(
"sampl
ing_ratio"
,
sampling_ratio
)
.
get
<
bool
>
(
"aligned"
,
aligned
)
.
get
<
bool
>
(
"clockwise"
,
clockwise
)
.
done
();
...
...
@@ -85,7 +85,7 @@ void roi_align_rotated_forward_cpu_parrots(HostContext& ctx,
const
auto
&
rois
=
buildATensor
(
ctx
,
ins
[
1
]);
auto
output
=
buildATensor
(
ctx
,
outs
[
0
]);
roi_align_rotated_forward_cpu
(
input
,
rois
,
output
,
pooled_height
,
pooled_width
,
spatial_scale
,
sampl
e_num
,
pooled_width
,
spatial_scale
,
sampl
ing_ratio
,
aligned
,
clockwise
);
}
...
...
@@ -96,14 +96,14 @@ void roi_align_rotated_backward_cpu_parrots(HostContext& ctx,
int
pooled_height
;
int
pooled_width
;
float
spatial_scale
;
int
sampl
e_num
;
int
sampl
ing_ratio
;
bool
aligned
;
bool
clockwise
;
SSAttrs
(
attr
)
.
get
<
int
>
(
"pooled_height"
,
pooled_height
)
.
get
<
int
>
(
"pooled_width"
,
pooled_width
)
.
get
<
float
>
(
"spatial_scale"
,
spatial_scale
)
.
get
<
int
>
(
"sampl
e_num"
,
sample_num
)
.
get
<
int
>
(
"sampl
ing_ratio"
,
sampling_ratio
)
.
get
<
bool
>
(
"aligned"
,
aligned
)
.
get
<
bool
>
(
"clockwise"
,
clockwise
)
.
done
();
...
...
@@ -112,7 +112,7 @@ void roi_align_rotated_backward_cpu_parrots(HostContext& ctx,
const
auto
&
rois
=
buildATensor
(
ctx
,
ins
[
1
]);
auto
grad_input
=
buildATensor
(
ctx
,
outs
[
0
]);
roi_align_rotated_backward_cpu
(
grad_output
,
rois
,
grad_input
,
pooled_height
,
pooled_width
,
spatial_scale
,
sampl
e_num
,
pooled_width
,
spatial_scale
,
sampl
ing_ratio
,
aligned
,
clockwise
);
}
...
...
@@ -120,7 +120,7 @@ PARROTS_EXTENSION_REGISTER(roi_align_rotated_forward)
.
attr
(
"pooled_height"
)
.
attr
(
"pooled_width"
)
.
attr
(
"spatial_scale"
)
.
attr
(
"sampl
e_num
"
)
.
attr
(
"sampl
ing_ratio
"
)
.
attr
(
"aligned"
)
.
attr
(
"clockwise"
)
.
input
(
2
)
...
...
@@ -135,7 +135,7 @@ PARROTS_EXTENSION_REGISTER(roi_align_rotated_backward)
.
attr
(
"pooled_height"
)
.
attr
(
"pooled_width"
)
.
attr
(
"spatial_scale"
)
.
attr
(
"sampl
e_num
"
)
.
attr
(
"sampl
ing_ratio
"
)
.
attr
(
"aligned"
)
.
attr
(
"clockwise"
)
.
input
(
2
)
...
...
Prev
1
…
11
12
13
14
15
16
17
18
19
…
23
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment