Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
MMCV
Commits
fdeee889
Commit
fdeee889
authored
May 25, 2025
by
limm
Browse files
release v1.6.1 of mmcv
parent
df465820
Changes
457
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
514 additions
and
1035 deletions
+514
-1035
mmcv/ops/csrc/parrots/deform_conv.cpp
mmcv/ops/csrc/parrots/deform_conv.cpp
+50
-91
mmcv/ops/csrc/parrots/deform_conv_cpu.cpp
mmcv/ops/csrc/parrots/deform_conv_cpu.cpp
+0
-377
mmcv/ops/csrc/parrots/deform_roi_pool.cpp
mmcv/ops/csrc/parrots/deform_roi_pool.cpp
+15
-55
mmcv/ops/csrc/parrots/diff_iou_rotated.cpp
mmcv/ops/csrc/parrots/diff_iou_rotated.cpp
+14
-0
mmcv/ops/csrc/parrots/diff_iou_rotated_parrots.cpp
mmcv/ops/csrc/parrots/diff_iou_rotated_parrots.cpp
+28
-0
mmcv/ops/csrc/parrots/diff_iou_rotated_pytorch.h
mmcv/ops/csrc/parrots/diff_iou_rotated_pytorch.h
+10
-0
mmcv/ops/csrc/parrots/focal_loss.cpp
mmcv/ops/csrc/parrots/focal_loss.cpp
+19
-97
mmcv/ops/csrc/parrots/furthest_point_sample.cpp
mmcv/ops/csrc/parrots/furthest_point_sample.cpp
+16
-44
mmcv/ops/csrc/parrots/fused_bias_leakyrelu.cpp
mmcv/ops/csrc/parrots/fused_bias_leakyrelu.cpp
+112
-20
mmcv/ops/csrc/parrots/gather_points.cpp
mmcv/ops/csrc/parrots/gather_points.cpp
+13
-38
mmcv/ops/csrc/parrots/group_points.cpp
mmcv/ops/csrc/parrots/group_points.cpp
+13
-37
mmcv/ops/csrc/parrots/info.cpp
mmcv/ops/csrc/parrots/info.cpp
+56
-0
mmcv/ops/csrc/parrots/iou3d.cpp
mmcv/ops/csrc/parrots/iou3d.cpp
+82
-179
mmcv/ops/csrc/parrots/iou3d_parrots.cpp
mmcv/ops/csrc/parrots/iou3d_parrots.cpp
+17
-17
mmcv/ops/csrc/parrots/iou3d_pytorch.h
mmcv/ops/csrc/parrots/iou3d_pytorch.h
+6
-6
mmcv/ops/csrc/parrots/knn.cpp
mmcv/ops/csrc/parrots/knn.cpp
+6
-21
mmcv/ops/csrc/parrots/masked_conv2d.cpp
mmcv/ops/csrc/parrots/masked_conv2d.cpp
+11
-53
mmcv/ops/csrc/parrots/min_area_polygons.cpp
mmcv/ops/csrc/parrots/min_area_polygons.cpp
+11
-0
mmcv/ops/csrc/parrots/min_area_polygons_parrots.cpp
mmcv/ops/csrc/parrots/min_area_polygons_parrots.cpp
+26
-0
mmcv/ops/csrc/parrots/min_area_polygons_pytorch.h
mmcv/ops/csrc/parrots/min_area_polygons_pytorch.h
+9
-0
No files found.
Too many changes to show.
To preserve performance only
457 of 457+
files are displayed.
Plain diff
Email patch
mmcv/ops/csrc/parrots/deform_conv.cpp
View file @
fdeee889
// Copyright (c) OpenMMLab. All rights reserved
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
void
deformable_im2col_impl
(
Tensor
data_im
,
Tensor
data_offset
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
ksize_h
,
const
int
ksize_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
parallel_imgs
,
const
int
deformable_group
,
Tensor
data_col
)
{
DISPATCH_DEVICE_IMPL
(
deformable_im2col_impl
,
data_im
,
data_offset
,
channels
,
height
,
width
,
ksize_h
,
ksize_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
parallel_imgs
,
deformable_group
,
data_col
);
}
#ifdef MMCV_WITH_CUDA
void
deformable_col2im_impl
(
Tensor
data_col
,
Tensor
data_offset
,
const
int
channels
,
const
int
height
,
void
deformable_im2col
(
Tensor
data_im
,
Tensor
data_offset
,
const
int
channels
,
const
int
width
,
const
int
ksize_h
,
const
int
height
,
const
int
width
,
const
int
ksize_h
,
const
int
ksize_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
ksize_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
parallel_imgs
,
const
int
deformable_group
,
const
int
parallel_imgs
,
const
int
deformable_group
,
Tensor
grad_im
)
{
Tensor
data_col
);
DISPATCH_DEVICE_IMPL
(
deformable_col2im_impl
,
data_col
,
data_offset
,
channels
,
height
,
width
,
ksize_h
,
ksize_w
,
pad_h
,
pad_w
,
stride_h
,
void
deformable_col2im
(
Tensor
data_col
,
Tensor
data_offset
,
const
int
channels
,
stride_w
,
dilation_h
,
dilation_w
,
parallel_imgs
,
const
int
height
,
const
int
width
,
const
int
ksize_h
,
deformable_group
,
grad_im
);
const
int
ksize_w
,
const
int
pad_h
,
const
int
pad_w
,
}
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
parallel_imgs
,
const
int
deformable_group
,
Tensor
grad_im
);
void
deformable_col2im_coord
(
Tensor
data_col
,
Tensor
data_im
,
Tensor
data_offset
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
ksize_h
,
const
int
ksize_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
parallel_imgs
,
const
int
deformable_group
,
Tensor
grad_offset
);
#endif
void
deformable_im2col_cpu
(
Tensor
data_im
,
Tensor
data_offset
,
void
deformable_col2im_coord_impl
(
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
ksize_h
,
const
int
ksize_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
parallel_imgs
,
const
int
deformable_group
,
Tensor
data_col
);
void
deformable_col2im_cpu
(
Tensor
data_col
,
Tensor
data_offset
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
ksize_h
,
const
int
ksize_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
parallel_imgs
,
const
int
deformable_group
,
Tensor
grad_im
);
void
deformable_col2im_coord_cpu
(
Tensor
data_col
,
Tensor
data_im
,
Tensor
data_offset
,
const
int
channels
,
Tensor
data_col
,
Tensor
data_im
,
Tensor
data_offset
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
ksize_h
,
const
int
ksize_w
,
const
int
height
,
const
int
width
,
const
int
ksize_h
,
const
int
ksize_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
parallel_imgs
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
parallel_imgs
,
const
int
deformable_group
,
Tensor
grad_offset
);
const
int
deformable_group
,
Tensor
grad_offset
)
{
DISPATCH_DEVICE_IMPL
(
deformable_col2im_coord_impl
,
data_col
,
data_im
,
data_offset
,
channels
,
height
,
width
,
ksize_h
,
ksize_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
parallel_imgs
,
deformable_group
,
grad_offset
);
}
void
deform_conv_shape_check
(
at
::
Tensor
input
,
at
::
Tensor
offset
,
void
deform_conv_shape_check
(
at
::
Tensor
input
,
at
::
Tensor
offset
,
at
::
Tensor
*
gradOutput
,
at
::
Tensor
weight
,
int
kH
,
at
::
Tensor
*
gradOutput
,
at
::
Tensor
weight
,
int
kH
,
...
@@ -227,17 +216,9 @@ void deform_conv_forward(Tensor input, Tensor weight, Tensor offset,
...
@@ -227,17 +216,9 @@ void deform_conv_forward(Tensor input, Tensor weight, Tensor offset,
output_buffer
.
size
(
2
),
output_buffer
.
size
(
3
)});
output_buffer
.
size
(
2
),
output_buffer
.
size
(
3
)});
for
(
int
elt
=
0
;
elt
<
batchSize
/
im2col_step
;
elt
++
)
{
for
(
int
elt
=
0
;
elt
<
batchSize
/
im2col_step
;
elt
++
)
{
if
(
input
.
device
().
is_cuda
())
{
deformable_im2col_impl
(
input
[
elt
],
offset
[
elt
],
nInputPlane
,
inputHeight
,
#ifdef MMCV_WITH_CUDA
inputWidth
,
kH
,
kW
,
padH
,
padW
,
dH
,
dW
,
dilationH
,
deformable_im2col
(
input
[
elt
],
offset
[
elt
],
nInputPlane
,
inputHeight
,
dilationW
,
im2col_step
,
deformable_group
,
columns
);
inputWidth
,
kH
,
kW
,
padH
,
padW
,
dH
,
dW
,
dilationH
,
dilationW
,
im2col_step
,
deformable_group
,
columns
);
#endif
}
else
{
deformable_im2col_cpu
(
input
[
elt
],
offset
[
elt
],
nInputPlane
,
inputHeight
,
inputWidth
,
kH
,
kW
,
padH
,
padW
,
dH
,
dW
,
dilationH
,
dilationW
,
im2col_step
,
deformable_group
,
columns
);
}
columns
=
columns
.
view
({
group
,
columns
.
size
(
0
)
/
group
,
columns
.
size
(
1
)});
columns
=
columns
.
view
({
group
,
columns
.
size
(
0
)
/
group
,
columns
.
size
(
1
)});
weight
=
weight
.
view
({
group
,
weight
.
size
(
0
)
/
group
,
weight
.
size
(
1
),
weight
=
weight
.
view
({
group
,
weight
.
size
(
0
)
/
group
,
weight
.
size
(
1
),
...
@@ -373,29 +354,15 @@ void deform_conv_backward_input(Tensor input, Tensor offset, Tensor gradOutput,
...
@@ -373,29 +354,15 @@ void deform_conv_backward_input(Tensor input, Tensor offset, Tensor gradOutput,
{
gradOutput
.
size
(
0
),
gradOutput
.
size
(
1
)
*
gradOutput
.
size
(
2
),
{
gradOutput
.
size
(
0
),
gradOutput
.
size
(
1
)
*
gradOutput
.
size
(
2
),
gradOutput
.
size
(
3
),
gradOutput
.
size
(
4
),
gradOutput
.
size
(
5
)});
gradOutput
.
size
(
3
),
gradOutput
.
size
(
4
),
gradOutput
.
size
(
5
)});
if
(
input
.
device
().
is_cuda
())
{
deformable_col2im_coord_impl
(
columns
,
input
[
elt
],
offset
[
elt
],
nInputPlane
,
#ifdef MMCV_WITH_CUDA
inputHeight
,
inputWidth
,
kH
,
kW
,
padH
,
padW
,
deformable_col2im_coord
(
columns
,
input
[
elt
],
offset
[
elt
],
nInputPlane
,
dH
,
dW
,
dilationH
,
dilationW
,
im2col_step
,
inputHeight
,
inputWidth
,
kH
,
kW
,
padH
,
padW
,
dH
,
deformable_group
,
gradOffset
[
elt
]);
dW
,
dilationH
,
dilationW
,
im2col_step
,
deformable_group
,
gradOffset
[
elt
]);
deformable_col2im_impl
(
columns
,
offset
[
elt
],
nInputPlane
,
inputHeight
,
inputWidth
,
kH
,
kW
,
padH
,
padW
,
dH
,
dW
,
dilationH
,
deformable_col2im
(
columns
,
offset
[
elt
],
nInputPlane
,
inputHeight
,
dilationW
,
im2col_step
,
deformable_group
,
inputWidth
,
kH
,
kW
,
padH
,
padW
,
dH
,
dW
,
dilationH
,
gradInput
[
elt
]);
dilationW
,
im2col_step
,
deformable_group
,
gradInput
[
elt
]);
#endif
}
else
{
deformable_col2im_coord_cpu
(
columns
,
input
[
elt
],
offset
[
elt
],
nInputPlane
,
inputHeight
,
inputWidth
,
kH
,
kW
,
padH
,
padW
,
dH
,
dW
,
dilationH
,
dilationW
,
im2col_step
,
deformable_group
,
gradOffset
[
elt
]);
deformable_col2im_cpu
(
columns
,
offset
[
elt
],
nInputPlane
,
inputHeight
,
inputWidth
,
kH
,
kW
,
padH
,
padW
,
dH
,
dW
,
dilationH
,
dilationW
,
im2col_step
,
deformable_group
,
gradInput
[
elt
]);
}
weight
=
weight
.
view
({
weight
.
size
(
0
)
*
weight
.
size
(
1
),
weight
.
size
(
2
),
weight
=
weight
.
view
({
weight
.
size
(
0
)
*
weight
.
size
(
1
),
weight
.
size
(
2
),
weight
.
size
(
3
),
weight
.
size
(
4
)});
weight
.
size
(
3
),
weight
.
size
(
4
)});
...
@@ -508,17 +475,9 @@ void deform_conv_backward_parameters(Tensor input, Tensor offset,
...
@@ -508,17 +475,9 @@ void deform_conv_backward_parameters(Tensor input, Tensor offset,
deformable_group
*
2
*
kH
*
kW
,
outputHeight
,
outputWidth
});
deformable_group
*
2
*
kH
*
kW
,
outputHeight
,
outputWidth
});
for
(
int
elt
=
0
;
elt
<
batchSize
/
im2col_step
;
elt
++
)
{
for
(
int
elt
=
0
;
elt
<
batchSize
/
im2col_step
;
elt
++
)
{
if
(
input
.
device
().
is_cuda
())
{
deformable_im2col_impl
(
input
[
elt
],
offset
[
elt
],
nInputPlane
,
inputHeight
,
#ifdef MMCV_WITH_CUDA
inputWidth
,
kH
,
kW
,
padH
,
padW
,
dH
,
dW
,
dilationH
,
deformable_im2col
(
input
[
elt
],
offset
[
elt
],
nInputPlane
,
inputHeight
,
dilationW
,
im2col_step
,
deformable_group
,
columns
);
inputWidth
,
kH
,
kW
,
padH
,
padW
,
dH
,
dW
,
dilationH
,
dilationW
,
im2col_step
,
deformable_group
,
columns
);
#endif
}
else
{
deformable_im2col_cpu
(
input
[
elt
],
offset
[
elt
],
nInputPlane
,
inputHeight
,
inputWidth
,
kH
,
kW
,
padH
,
padW
,
dH
,
dW
,
dilationH
,
dilationW
,
im2col_step
,
deformable_group
,
columns
);
}
// divide into group
// divide into group
gradOutputBuffer
=
gradOutputBuffer
.
view
(
gradOutputBuffer
=
gradOutputBuffer
.
view
(
...
...
mmcv/ops/csrc/parrots/deform_conv_cpu.cpp
deleted
100644 → 0
View file @
df465820
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
template
<
typename
T
>
T
deformable_im2col_bilinear_cpu
(
const
T
*
input
,
const
int
data_width
,
const
int
height
,
const
int
width
,
T
h
,
T
w
)
{
if
(
h
<=
-
1
||
height
<=
h
||
w
<=
-
1
||
width
<=
w
)
{
return
0
;
}
int
h_low
=
floor
(
h
);
int
w_low
=
floor
(
w
);
int
h_high
=
h_low
+
1
;
int
w_high
=
w_low
+
1
;
T
lh
=
h
-
h_low
;
T
lw
=
w
-
w_low
;
T
hh
=
1
-
lh
,
hw
=
1
-
lw
;
T
v1
=
0
;
if
(
h_low
>=
0
&&
w_low
>=
0
)
v1
=
input
[
h_low
*
data_width
+
w_low
];
T
v2
=
0
;
if
(
h_low
>=
0
&&
w_high
<=
width
-
1
)
v2
=
input
[
h_low
*
data_width
+
w_high
];
T
v3
=
0
;
if
(
h_high
<=
height
-
1
&&
w_low
>=
0
)
v3
=
input
[
h_high
*
data_width
+
w_low
];
T
v4
=
0
;
if
(
h_high
<=
height
-
1
&&
w_high
<=
width
-
1
)
v4
=
input
[
h_high
*
data_width
+
w_high
];
T
w1
=
hh
*
hw
,
w2
=
hh
*
lw
,
w3
=
lh
*
hw
,
w4
=
lh
*
lw
;
T
val
=
(
w1
*
v1
+
w2
*
v2
+
w3
*
v3
+
w4
*
v4
);
return
val
;
}
template
<
typename
T
>
T
get_gradient_weight_cpu
(
T
argmax_h
,
T
argmax_w
,
const
int
h
,
const
int
w
,
const
int
height
,
const
int
width
)
{
if
(
argmax_h
<=
-
1
||
argmax_h
>=
height
||
argmax_w
<=
-
1
||
argmax_w
>=
width
)
{
// empty
return
0
;
}
int
argmax_h_low
=
floor
(
argmax_h
);
int
argmax_w_low
=
floor
(
argmax_w
);
int
argmax_h_high
=
argmax_h_low
+
1
;
int
argmax_w_high
=
argmax_w_low
+
1
;
T
weight
=
0
;
if
(
h
==
argmax_h_low
&&
w
==
argmax_w_low
)
weight
=
(
h
+
1
-
argmax_h
)
*
(
w
+
1
-
argmax_w
);
if
(
h
==
argmax_h_low
&&
w
==
argmax_w_high
)
weight
=
(
h
+
1
-
argmax_h
)
*
(
argmax_w
+
1
-
w
);
if
(
h
==
argmax_h_high
&&
w
==
argmax_w_low
)
weight
=
(
argmax_h
+
1
-
h
)
*
(
w
+
1
-
argmax_w
);
if
(
h
==
argmax_h_high
&&
w
==
argmax_w_high
)
weight
=
(
argmax_h
+
1
-
h
)
*
(
argmax_w
+
1
-
w
);
return
weight
;
}
template
<
typename
T
>
T
get_coordinate_weight_cpu
(
T
argmax_h
,
T
argmax_w
,
const
int
height
,
const
int
width
,
const
T
*
im_data
,
const
int
data_width
,
const
int
bp_dir
)
{
if
(
argmax_h
<=
-
1
||
argmax_h
>=
height
||
argmax_w
<=
-
1
||
argmax_w
>=
width
)
{
// empty
return
0
;
}
int
argmax_h_low
=
floor
(
argmax_h
);
int
argmax_w_low
=
floor
(
argmax_w
);
int
argmax_h_high
=
argmax_h_low
+
1
;
int
argmax_w_high
=
argmax_w_low
+
1
;
T
weight
=
0
;
if
(
bp_dir
==
0
)
{
if
(
argmax_h_low
>=
0
&&
argmax_w_low
>=
0
)
weight
+=
-
1
*
(
argmax_w_low
+
1
-
argmax_w
)
*
im_data
[
argmax_h_low
*
data_width
+
argmax_w_low
];
if
(
argmax_h_low
>=
0
&&
argmax_w_high
<=
width
-
1
)
weight
+=
-
1
*
(
argmax_w
-
argmax_w_low
)
*
im_data
[
argmax_h_low
*
data_width
+
argmax_w_high
];
if
(
argmax_h_high
<=
height
-
1
&&
argmax_w_low
>=
0
)
weight
+=
(
argmax_w_low
+
1
-
argmax_w
)
*
im_data
[
argmax_h_high
*
data_width
+
argmax_w_low
];
if
(
argmax_h_high
<=
height
-
1
&&
argmax_w_high
<=
width
-
1
)
weight
+=
(
argmax_w
-
argmax_w_low
)
*
im_data
[
argmax_h_high
*
data_width
+
argmax_w_high
];
}
else
if
(
bp_dir
==
1
)
{
if
(
argmax_h_low
>=
0
&&
argmax_w_low
>=
0
)
weight
+=
-
1
*
(
argmax_h_low
+
1
-
argmax_h
)
*
im_data
[
argmax_h_low
*
data_width
+
argmax_w_low
];
if
(
argmax_h_low
>=
0
&&
argmax_w_high
<=
width
-
1
)
weight
+=
(
argmax_h_low
+
1
-
argmax_h
)
*
im_data
[
argmax_h_low
*
data_width
+
argmax_w_high
];
if
(
argmax_h_high
<=
height
-
1
&&
argmax_w_low
>=
0
)
weight
+=
-
1
*
(
argmax_h
-
argmax_h_low
)
*
im_data
[
argmax_h_high
*
data_width
+
argmax_w_low
];
if
(
argmax_h_high
<=
height
-
1
&&
argmax_w_high
<=
width
-
1
)
weight
+=
(
argmax_h
-
argmax_h_low
)
*
im_data
[
argmax_h_high
*
data_width
+
argmax_w_high
];
}
return
weight
;
}
template
<
typename
T
>
void
deformable_im2col_cpu_kernel
(
const
int
n
,
const
T
*
data_im
,
const
T
*
data_offset
,
const
int
height
,
const
int
width
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
channel_per_deformable_group
,
const
int
batch_size
,
const
int
num_channels
,
const
int
deformable_group
,
const
int
height_col
,
const
int
width_col
,
T
*
data_col
)
{
for
(
int
index
=
0
;
index
<
n
;
index
++
)
{
// index index of output matrix
const
int
w_col
=
index
%
width_col
;
const
int
h_col
=
(
index
/
width_col
)
%
height_col
;
const
int
b_col
=
(
index
/
width_col
/
height_col
)
%
batch_size
;
const
int
c_im
=
(
index
/
width_col
/
height_col
)
/
batch_size
;
const
int
c_col
=
c_im
*
kernel_h
*
kernel_w
;
// compute deformable group index
const
int
deformable_group_index
=
c_im
/
channel_per_deformable_group
;
const
int
h_in
=
h_col
*
stride_h
-
pad_h
;
const
int
w_in
=
w_col
*
stride_w
-
pad_w
;
T
*
data_col_ptr
=
data_col
+
((
c_col
*
batch_size
+
b_col
)
*
height_col
+
h_col
)
*
width_col
+
w_col
;
const
T
*
data_im_ptr
=
data_im
+
(
b_col
*
num_channels
+
c_im
)
*
height
*
width
;
const
T
*
data_offset_ptr
=
data_offset
+
(
b_col
*
deformable_group
+
deformable_group_index
)
*
2
*
kernel_h
*
kernel_w
*
height_col
*
width_col
;
for
(
int
i
=
0
;
i
<
kernel_h
;
++
i
)
{
for
(
int
j
=
0
;
j
<
kernel_w
;
++
j
)
{
const
int
data_offset_h_ptr
=
((
2
*
(
i
*
kernel_w
+
j
))
*
height_col
+
h_col
)
*
width_col
+
w_col
;
const
int
data_offset_w_ptr
=
((
2
*
(
i
*
kernel_w
+
j
)
+
1
)
*
height_col
+
h_col
)
*
width_col
+
w_col
;
const
T
offset_h
=
data_offset_ptr
[
data_offset_h_ptr
];
const
T
offset_w
=
data_offset_ptr
[
data_offset_w_ptr
];
T
val
=
static_cast
<
T
>
(
0
);
const
T
h_im
=
h_in
+
i
*
dilation_h
+
offset_h
;
const
T
w_im
=
w_in
+
j
*
dilation_w
+
offset_w
;
if
(
h_im
>
-
1
&&
w_im
>
-
1
&&
h_im
<
height
&&
w_im
<
width
)
val
=
deformable_im2col_bilinear_cpu
(
data_im_ptr
,
width
,
height
,
width
,
h_im
,
w_im
);
*
data_col_ptr
=
val
;
data_col_ptr
+=
batch_size
*
height_col
*
width_col
;
}
}
}
}
template
<
typename
T
>
void
deformable_col2im_cpu_kernel
(
const
int
n
,
const
T
*
data_col
,
const
T
*
data_offset
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
channel_per_deformable_group
,
const
int
batch_size
,
const
int
deformable_group
,
const
int
height_col
,
const
int
width_col
,
T
*
grad_im
)
{
for
(
int
index
=
0
;
index
<
n
;
index
++
)
{
const
int
j
=
(
index
/
width_col
/
height_col
/
batch_size
)
%
kernel_w
;
const
int
i
=
(
index
/
width_col
/
height_col
/
batch_size
/
kernel_w
)
%
kernel_h
;
const
int
c
=
index
/
width_col
/
height_col
/
batch_size
/
kernel_w
/
kernel_h
;
// compute the start and end of the output
const
int
deformable_group_index
=
c
/
channel_per_deformable_group
;
int
w_out
=
index
%
width_col
;
int
h_out
=
(
index
/
width_col
)
%
height_col
;
int
b
=
(
index
/
width_col
/
height_col
)
%
batch_size
;
int
w_in
=
w_out
*
stride_w
-
pad_w
;
int
h_in
=
h_out
*
stride_h
-
pad_h
;
const
T
*
data_offset_ptr
=
data_offset
+
(
b
*
deformable_group
+
deformable_group_index
)
*
2
*
kernel_h
*
kernel_w
*
height_col
*
width_col
;
const
int
data_offset_h_ptr
=
((
2
*
(
i
*
kernel_w
+
j
))
*
height_col
+
h_out
)
*
width_col
+
w_out
;
const
int
data_offset_w_ptr
=
((
2
*
(
i
*
kernel_w
+
j
)
+
1
)
*
height_col
+
h_out
)
*
width_col
+
w_out
;
const
T
offset_h
=
data_offset_ptr
[
data_offset_h_ptr
];
const
T
offset_w
=
data_offset_ptr
[
data_offset_w_ptr
];
const
T
cur_inv_h_data
=
h_in
+
i
*
dilation_h
+
offset_h
;
const
T
cur_inv_w_data
=
w_in
+
j
*
dilation_w
+
offset_w
;
const
T
cur_top_grad
=
data_col
[
index
];
const
int
cur_h
=
(
int
)
cur_inv_h_data
;
const
int
cur_w
=
(
int
)
cur_inv_w_data
;
for
(
int
dy
=
-
2
;
dy
<=
2
;
dy
++
)
{
for
(
int
dx
=
-
2
;
dx
<=
2
;
dx
++
)
{
if
(
cur_h
+
dy
>=
0
&&
cur_h
+
dy
<
height
&&
cur_w
+
dx
>=
0
&&
cur_w
+
dx
<
width
&&
abs
(
cur_inv_h_data
-
(
cur_h
+
dy
))
<
1
&&
abs
(
cur_inv_w_data
-
(
cur_w
+
dx
))
<
1
)
{
int
cur_bottom_grad_pos
=
((
b
*
channels
+
c
)
*
height
+
cur_h
+
dy
)
*
width
+
cur_w
+
dx
;
T
weight
=
get_gradient_weight_cpu
(
cur_inv_h_data
,
cur_inv_w_data
,
cur_h
+
dy
,
cur_w
+
dx
,
height
,
width
);
*
(
grad_im
+
cur_bottom_grad_pos
)
+=
weight
*
cur_top_grad
;
}
}
}
}
}
template
<
typename
T
>
void
deformable_col2im_coord_cpu_kernel
(
const
int
n
,
const
T
*
data_col
,
const
T
*
data_im
,
const
T
*
data_offset
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
channel_per_deformable_group
,
const
int
batch_size
,
const
int
offset_channels
,
const
int
deformable_group
,
const
int
height_col
,
const
int
width_col
,
T
*
grad_offset
)
{
for
(
int
index
=
0
;
index
<
n
;
index
++
)
{
T
val
=
0
;
int
w
=
index
%
width_col
;
int
h
=
(
index
/
width_col
)
%
height_col
;
int
c
=
(
index
/
width_col
/
height_col
)
%
offset_channels
;
int
b
=
(
index
/
width_col
/
height_col
)
/
offset_channels
;
// compute the start and end of the output
const
int
deformable_group_index
=
c
/
(
2
*
kernel_h
*
kernel_w
);
const
int
col_step
=
kernel_h
*
kernel_w
;
int
cnt
=
0
;
const
T
*
data_col_ptr
=
data_col
+
deformable_group_index
*
channel_per_deformable_group
*
batch_size
*
width_col
*
height_col
;
const
T
*
data_im_ptr
=
data_im
+
(
b
*
deformable_group
+
deformable_group_index
)
*
channel_per_deformable_group
/
kernel_h
/
kernel_w
*
height
*
width
;
const
T
*
data_offset_ptr
=
data_offset
+
(
b
*
deformable_group
+
deformable_group_index
)
*
2
*
kernel_h
*
kernel_w
*
height_col
*
width_col
;
const
int
offset_c
=
c
-
deformable_group_index
*
2
*
kernel_h
*
kernel_w
;
for
(
int
col_c
=
(
offset_c
/
2
);
col_c
<
channel_per_deformable_group
;
col_c
+=
col_step
)
{
const
int
col_pos
=
(((
col_c
*
batch_size
+
b
)
*
height_col
)
+
h
)
*
width_col
+
w
;
const
int
bp_dir
=
offset_c
%
2
;
int
j
=
(
col_pos
/
width_col
/
height_col
/
batch_size
)
%
kernel_w
;
int
i
=
(
col_pos
/
width_col
/
height_col
/
batch_size
/
kernel_w
)
%
kernel_h
;
int
w_out
=
col_pos
%
width_col
;
int
h_out
=
(
col_pos
/
width_col
)
%
height_col
;
int
w_in
=
w_out
*
stride_w
-
pad_w
;
int
h_in
=
h_out
*
stride_h
-
pad_h
;
const
int
data_offset_h_ptr
=
(((
2
*
(
i
*
kernel_w
+
j
))
*
height_col
+
h_out
)
*
width_col
+
w_out
);
const
int
data_offset_w_ptr
=
(((
2
*
(
i
*
kernel_w
+
j
)
+
1
)
*
height_col
+
h_out
)
*
width_col
+
w_out
);
const
T
offset_h
=
data_offset_ptr
[
data_offset_h_ptr
];
const
T
offset_w
=
data_offset_ptr
[
data_offset_w_ptr
];
T
inv_h
=
h_in
+
i
*
dilation_h
+
offset_h
;
T
inv_w
=
w_in
+
j
*
dilation_w
+
offset_w
;
if
(
inv_h
<=
-
1
||
inv_w
<=
-
1
||
inv_h
>=
height
||
inv_w
>=
width
)
inv_h
=
inv_w
=
-
2
;
const
T
weight
=
get_coordinate_weight_cpu
(
inv_h
,
inv_w
,
height
,
width
,
data_im_ptr
+
cnt
*
height
*
width
,
width
,
bp_dir
);
val
+=
weight
*
data_col_ptr
[
col_pos
];
cnt
+=
1
;
}
grad_offset
[
index
]
=
val
;
}
}
void
deformable_im2col_cpu
(
Tensor
data_im
,
Tensor
data_offset
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
ksize_h
,
const
int
ksize_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
parallel_imgs
,
const
int
deformable_group
,
Tensor
data_col
)
{
int
height_col
=
(
height
+
2
*
pad_h
-
(
dilation_h
*
(
ksize_h
-
1
)
+
1
))
/
stride_h
+
1
;
int
width_col
=
(
width
+
2
*
pad_w
-
(
dilation_w
*
(
ksize_w
-
1
)
+
1
))
/
stride_w
+
1
;
int
num_kernels
=
channels
*
height_col
*
width_col
*
parallel_imgs
;
int
channel_per_deformable_group
=
channels
/
deformable_group
;
AT_DISPATCH_FLOATING_TYPES_AND_HALF
(
data_im
.
scalar_type
(),
"deformable_im2col_cpu"
,
[
&
]
{
deformable_im2col_cpu_kernel
<
scalar_t
>
(
num_kernels
,
data_im
.
data_ptr
<
scalar_t
>
(),
data_offset
.
data_ptr
<
scalar_t
>
(),
height
,
width
,
ksize_h
,
ksize_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
channel_per_deformable_group
,
parallel_imgs
,
channels
,
deformable_group
,
height_col
,
width_col
,
data_col
.
data_ptr
<
scalar_t
>
());
});
}
void
deformable_col2im_cpu
(
Tensor
data_col
,
Tensor
data_offset
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
ksize_h
,
const
int
ksize_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
parallel_imgs
,
const
int
deformable_group
,
Tensor
grad_im
)
{
// todo: make sure parallel_imgs is passed in correctly
int
height_col
=
(
height
+
2
*
pad_h
-
(
dilation_h
*
(
ksize_h
-
1
)
+
1
))
/
stride_h
+
1
;
int
width_col
=
(
width
+
2
*
pad_w
-
(
dilation_w
*
(
ksize_w
-
1
)
+
1
))
/
stride_w
+
1
;
int
num_kernels
=
channels
*
ksize_h
*
ksize_w
*
height_col
*
width_col
*
parallel_imgs
;
int
channel_per_deformable_group
=
channels
/
deformable_group
;
AT_DISPATCH_FLOATING_TYPES_AND_HALF
(
data_col
.
scalar_type
(),
"deformable_col2im_gpu"
,
([
&
]
{
const
scalar_t
*
data_col_
=
data_col
.
data_ptr
<
scalar_t
>
();
const
scalar_t
*
data_offset_
=
data_offset
.
data_ptr
<
scalar_t
>
();
scalar_t
*
grad_im_
=
grad_im
.
data_ptr
<
scalar_t
>
();
deformable_col2im_cpu_kernel
<
scalar_t
>
(
num_kernels
,
data_col_
,
data_offset_
,
channels
,
height
,
width
,
ksize_h
,
ksize_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
channel_per_deformable_group
,
parallel_imgs
,
deformable_group
,
height_col
,
width_col
,
grad_im_
);
}));
}
void
deformable_col2im_coord_cpu
(
Tensor
data_col
,
Tensor
data_im
,
Tensor
data_offset
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
ksize_h
,
const
int
ksize_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
parallel_imgs
,
const
int
deformable_group
,
Tensor
grad_offset
)
{
int
height_col
=
(
height
+
2
*
pad_h
-
(
dilation_h
*
(
ksize_h
-
1
)
+
1
))
/
stride_h
+
1
;
int
width_col
=
(
width
+
2
*
pad_w
-
(
dilation_w
*
(
ksize_w
-
1
)
+
1
))
/
stride_w
+
1
;
int
num_kernels
=
height_col
*
width_col
*
2
*
ksize_h
*
ksize_w
*
deformable_group
*
parallel_imgs
;
int
channel_per_deformable_group
=
channels
*
ksize_h
*
ksize_w
/
deformable_group
;
AT_DISPATCH_FLOATING_TYPES_AND_HALF
(
data_col
.
scalar_type
(),
"deformable_col2im_coord_cpu"
,
([
&
]
{
const
scalar_t
*
data_col_
=
data_col
.
data_ptr
<
scalar_t
>
();
const
scalar_t
*
data_im_
=
data_im
.
data_ptr
<
scalar_t
>
();
const
scalar_t
*
data_offset_
=
data_offset
.
data_ptr
<
scalar_t
>
();
scalar_t
*
grad_offset_
=
grad_offset
.
data_ptr
<
scalar_t
>
();
deformable_col2im_coord_cpu_kernel
<
scalar_t
>
(
num_kernels
,
data_col_
,
data_im_
,
data_offset_
,
channels
,
height
,
width
,
ksize_h
,
ksize_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
channel_per_deformable_group
,
parallel_imgs
,
2
*
ksize_h
*
ksize_w
*
deformable_group
,
deformable_group
,
height_col
,
width_col
,
grad_offset_
);
}));
}
mmcv/ops/csrc/parrots/deform_roi_pool.cpp
View file @
fdeee889
// Copyright (c) OpenMMLab. All rights reserved
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
void
deform_roi_pool_forward_impl
(
Tensor
input
,
Tensor
rois
,
Tensor
offset
,
void
DeformRoIPoolForwardCUDAKernelLauncher
(
Tensor
input
,
Tensor
rois
,
Tensor
offset
,
Tensor
output
,
int
pooled_height
,
int
pooled_width
,
float
spatial_scale
,
int
sampling_ratio
,
float
gamma
);
void
DeformRoIPoolBackwardCUDAKernelLauncher
(
Tensor
grad_output
,
Tensor
input
,
Tensor
rois
,
Tensor
offset
,
Tensor
grad_input
,
Tensor
grad_offset
,
int
pooled_height
,
int
pooled_width
,
float
spatial_scale
,
int
sampling_ratio
,
float
gamma
);
void
deform_roi_pool_forward_cuda
(
Tensor
input
,
Tensor
rois
,
Tensor
offset
,
Tensor
output
,
int
pooled_height
,
Tensor
output
,
int
pooled_height
,
int
pooled_width
,
float
spatial_scale
,
int
pooled_width
,
float
spatial_scale
,
int
sampling_ratio
,
float
gamma
)
{
int
sampling_ratio
,
float
gamma
)
{
D
eformRoIP
ool
F
orward
CUDAKernelLauncher
(
input
,
rois
,
offset
,
output
,
D
ISPATCH_DEVICE_IMPL
(
deform_roi_p
ool
_f
orward
_impl
,
input
,
rois
,
offset
,
pooled_height
,
pooled_width
,
output
,
pooled_height
,
pooled_width
,
spatial_scale
,
spatial_scale
,
sampling_ratio
,
gamma
);
sampling_ratio
,
gamma
);
}
}
void
deform_roi_pool_backward_
cuda
(
Tensor
grad_output
,
Tensor
input
,
void
deform_roi_pool_backward_
impl
(
Tensor
grad_output
,
Tensor
input
,
Tensor
rois
,
Tensor
offset
,
Tensor
rois
,
Tensor
offset
,
Tensor
grad_input
,
Tensor
grad_offset
,
Tensor
grad_input
,
Tensor
grad_offset
,
int
pooled_height
,
int
pooled_width
,
int
pooled_height
,
int
pooled_width
,
float
spatial_scale
,
int
sampling_ratio
,
float
spatial_scale
,
int
sampling_ratio
,
float
gamma
)
{
float
gamma
)
{
D
eformRoIP
ool
B
ackward
CUDAKernelLauncher
(
D
ISPATCH_DEVICE_IMPL
(
deform_roi_p
ool
_b
ackward
_impl
,
grad_output
,
input
,
rois
,
grad_output
,
input
,
rois
,
offset
,
grad_input
,
grad_offset
,
pooled_height
,
offset
,
grad_input
,
grad_offset
,
pooled_height
,
pooled_width
,
spatial_scale
,
sampling_ratio
,
gamma
);
pooled_width
,
spatial_scale
,
sampling_ratio
,
gamma
);
}
}
#endif
void
deform_roi_pool_forward
(
Tensor
input
,
Tensor
rois
,
Tensor
offset
,
void
deform_roi_pool_forward
(
Tensor
input
,
Tensor
rois
,
Tensor
offset
,
Tensor
output
,
int
pooled_height
,
int
pooled_width
,
Tensor
output
,
int
pooled_height
,
int
pooled_width
,
float
spatial_scale
,
int
sampling_ratio
,
float
spatial_scale
,
int
sampling_ratio
,
float
gamma
)
{
float
gamma
)
{
if
(
input
.
device
().
is_cuda
())
{
deform_roi_pool_forward_impl
(
input
,
rois
,
offset
,
output
,
pooled_height
,
#ifdef MMCV_WITH_CUDA
pooled_width
,
spatial_scale
,
sampling_ratio
,
CHECK_CUDA_INPUT
(
input
);
gamma
);
CHECK_CUDA_INPUT
(
rois
);
CHECK_CUDA_INPUT
(
offset
);
CHECK_CUDA_INPUT
(
output
);
deform_roi_pool_forward_cuda
(
input
,
rois
,
offset
,
output
,
pooled_height
,
pooled_width
,
spatial_scale
,
sampling_ratio
,
gamma
);
#else
AT_ERROR
(
"DeformRoIPool is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"DeformRoIPool is not implemented on CPU"
);
}
}
}
void
deform_roi_pool_backward
(
Tensor
grad_output
,
Tensor
input
,
Tensor
rois
,
void
deform_roi_pool_backward
(
Tensor
grad_output
,
Tensor
input
,
Tensor
rois
,
...
@@ -61,22 +36,7 @@ void deform_roi_pool_backward(Tensor grad_output, Tensor input, Tensor rois,
...
@@ -61,22 +36,7 @@ void deform_roi_pool_backward(Tensor grad_output, Tensor input, Tensor rois,
Tensor
grad_offset
,
int
pooled_height
,
Tensor
grad_offset
,
int
pooled_height
,
int
pooled_width
,
float
spatial_scale
,
int
pooled_width
,
float
spatial_scale
,
int
sampling_ratio
,
float
gamma
)
{
int
sampling_ratio
,
float
gamma
)
{
if
(
grad_output
.
device
().
is_cuda
())
{
deform_roi_pool_backward_impl
(
grad_output
,
input
,
rois
,
offset
,
grad_input
,
#ifdef MMCV_WITH_CUDA
grad_offset
,
pooled_height
,
pooled_width
,
CHECK_CUDA_INPUT
(
grad_output
);
spatial_scale
,
sampling_ratio
,
gamma
);
CHECK_CUDA_INPUT
(
input
);
CHECK_CUDA_INPUT
(
rois
);
CHECK_CUDA_INPUT
(
offset
);
CHECK_CUDA_INPUT
(
grad_input
);
CHECK_CUDA_INPUT
(
grad_offset
);
deform_roi_pool_backward_cuda
(
grad_output
,
input
,
rois
,
offset
,
grad_input
,
grad_offset
,
pooled_height
,
pooled_width
,
spatial_scale
,
sampling_ratio
,
gamma
);
#else
AT_ERROR
(
"DeformRoIPool is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"DeformRoIPool is not implemented on CPU"
);
}
}
}
mmcv/ops/csrc/parrots/diff_iou_rotated.cpp
0 → 100644
View file @
fdeee889
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
Tensor
diff_iou_rotated_sort_vertices_forward_impl
(
Tensor
vertices
,
Tensor
mask
,
Tensor
num_valid
)
{
return
DISPATCH_DEVICE_IMPL
(
diff_iou_rotated_sort_vertices_forward_impl
,
vertices
,
mask
,
num_valid
);
}
Tensor
diff_iou_rotated_sort_vertices_forward
(
Tensor
vertices
,
Tensor
mask
,
Tensor
num_valid
)
{
return
diff_iou_rotated_sort_vertices_forward_impl
(
vertices
,
mask
,
num_valid
);
}
mmcv/ops/csrc/parrots/diff_iou_rotated_parrots.cpp
0 → 100644
View file @
fdeee889
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>
#include "diff_iou_rotated_pytorch.h"
using
namespace
parrots
;
#ifdef MMCV_WITH_CUDA
void
diff_iou_rotated_sort_vertices_forward_cuda_parrots
(
CudaContext
&
ctx
,
const
SSElement
&
attr
,
const
OperatorBase
::
in_list_t
&
ins
,
OperatorBase
::
out_list_t
&
outs
)
{
at
::
Tensor
boxes
,
scores
,
dets
;
auto
vertices
=
buildATensor
(
ctx
,
ins
[
0
]);
auto
mask
=
buildATensor
(
ctx
,
ins
[
1
]);
auto
num_valid
=
buildATensor
(
ctx
,
ins
[
2
]);
auto
out
=
diff_iou_rotated_sort_vertices_forward_cuda
(
vertices
,
mask
,
num_valid
);
updateDArray
(
ctx
,
out
,
outs
[
0
]);
}
PARROTS_EXTENSION_REGISTER
(
diff_iou_rotated_sort_vertices_forward
)
.
input
(
3
)
.
output
(
1
)
.
apply
(
diff_iou_rotated_sort_vertices_forward_cuda_parrots
)
.
done
();
#endif
mmcv/ops/csrc/parrots/diff_iou_rotated_pytorch.h
0 → 100644
View file @
fdeee889
// Copyright (c) OpenMMLab. All rights reserved
#ifndef DIFF_IOU_ROTATED_PYTORCH_H
#define DIFF_IOU_ROTATED_PYTORCH_H
#include <torch/extension.h>
using
namespace
at
;
Tensor
diff_iou_rotated_sort_vertices_forward_cuda
(
Tensor
vertices
,
Tensor
mask
,
Tensor
num_valid
);
#endif // DIFF_IOU_ROTATED_PYTORCH_H
mmcv/ops/csrc/parrots/focal_loss.cpp
View file @
fdeee889
// Copyright (c) OpenMMLab. All rights reserved
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
void
sigmoid_focal_loss_forward_impl
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
void
SigmoidFocalLossForwardCUDAKernelLauncher
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
Tensor
output
,
const
float
gamma
,
const
float
alpha
);
void
SigmoidFocalLossBackwardCUDAKernelLauncher
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
Tensor
grad_input
,
const
float
gamma
,
const
float
alpha
);
void
SoftmaxFocalLossForwardCUDAKernelLauncher
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
Tensor
output
,
const
float
gamma
,
const
float
alpha
);
void
SoftmaxFocalLossBackwardCUDAKernelLauncher
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
Tensor
buff
,
Tensor
grad_input
,
const
float
gamma
,
const
float
alpha
);
void
sigmoid_focal_loss_forward_cuda
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
Tensor
output
,
float
gamma
,
float
alpha
)
{
Tensor
output
,
float
gamma
,
float
alpha
)
{
S
igmoid
F
ocal
L
oss
F
orward
CUDAKernelLauncher
(
input
,
target
,
weight
,
output
,
DISPATCH_DEVICE_IMPL
(
s
igmoid
_f
ocal
_l
oss
_f
orward
_impl
,
input
,
target
,
weight
,
gamma
,
alpha
);
output
,
gamma
,
alpha
);
}
}
void
sigmoid_focal_loss_backward_
cuda
(
Tensor
input
,
Tensor
target
,
void
sigmoid_focal_loss_backward_
impl
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
Tensor
grad_input
,
Tensor
weight
,
Tensor
grad_input
,
float
gamma
,
float
alpha
)
{
float
gamma
,
float
alpha
)
{
S
igmoid
F
ocal
L
oss
B
ackward
CUDAKernelLauncher
(
input
,
target
,
weight
,
grad_input
,
DISPATCH_DEVICE_IMPL
(
s
igmoid
_f
ocal
_l
oss
_b
ackward
_impl
,
input
,
target
,
weight
,
gamma
,
alpha
);
grad_input
,
gamma
,
alpha
);
}
}
void
softmax_focal_loss_forward_
cuda
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
void
softmax_focal_loss_forward_
impl
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
Tensor
output
,
float
gamma
,
float
alpha
)
{
Tensor
output
,
float
gamma
,
float
alpha
)
{
S
oftmax
F
ocal
L
oss
F
orward
CUDAKernelLauncher
(
input
,
target
,
weight
,
output
,
DISPATCH_DEVICE_IMPL
(
s
oftmax
_f
ocal
_l
oss
_f
orward
_impl
,
input
,
target
,
weight
,
gamma
,
alpha
);
output
,
gamma
,
alpha
);
}
}
void
softmax_focal_loss_backward_
cuda
(
Tensor
input
,
Tensor
target
,
void
softmax_focal_loss_backward_
impl
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
Tensor
buff
,
Tensor
weight
,
Tensor
buff
,
Tensor
grad_input
,
float
gamma
,
Tensor
grad_input
,
float
gamma
,
float
alpha
)
{
float
alpha
)
{
S
oftmax
F
ocal
L
oss
B
ackward
CUDAKernelLauncher
(
input
,
target
,
weight
,
buff
,
DISPATCH_DEVICE_IMPL
(
s
oftmax
_f
ocal
_l
oss
_b
ackward
_impl
,
input
,
target
,
weight
,
grad_input
,
gamma
,
alpha
);
buff
,
grad_input
,
gamma
,
alpha
);
}
}
#endif
void
sigmoid_focal_loss_forward
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
void
sigmoid_focal_loss_forward
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
Tensor
output
,
float
gamma
,
float
alpha
)
{
Tensor
output
,
float
gamma
,
float
alpha
)
{
if
(
input
.
device
().
is_cuda
())
{
sigmoid_focal_loss_forward_impl
(
input
,
target
,
weight
,
output
,
gamma
,
alpha
);
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT
(
input
);
CHECK_CUDA_INPUT
(
target
);
CHECK_CUDA_INPUT
(
weight
);
CHECK_CUDA_INPUT
(
output
);
sigmoid_focal_loss_forward_cuda
(
input
,
target
,
weight
,
output
,
gamma
,
alpha
);
#else
AT_ERROR
(
"SigmoidFocalLoss is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"SigmoidFocalLoss is not implemented on CPU"
);
}
}
}
void
sigmoid_focal_loss_backward
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
void
sigmoid_focal_loss_backward
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
Tensor
grad_input
,
float
gamma
,
float
alpha
)
{
Tensor
grad_input
,
float
gamma
,
float
alpha
)
{
if
(
input
.
device
().
is_cuda
())
{
sigmoid_focal_loss_backward_impl
(
input
,
target
,
weight
,
grad_input
,
gamma
,
#ifdef MMCV_WITH_CUDA
alpha
);
CHECK_CUDA_INPUT
(
input
);
CHECK_CUDA_INPUT
(
target
);
CHECK_CUDA_INPUT
(
weight
);
CHECK_CUDA_INPUT
(
grad_input
);
sigmoid_focal_loss_backward_cuda
(
input
,
target
,
weight
,
grad_input
,
gamma
,
alpha
);
#else
AT_ERROR
(
"SigmoidFocalLoss is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"SigmoidFocalLoss is not implemented on CPU"
);
}
}
}
void
softmax_focal_loss_forward
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
void
softmax_focal_loss_forward
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
Tensor
output
,
float
gamma
,
float
alpha
)
{
Tensor
output
,
float
gamma
,
float
alpha
)
{
if
(
input
.
device
().
is_cuda
())
{
softmax_focal_loss_forward_impl
(
input
,
target
,
weight
,
output
,
gamma
,
alpha
);
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT
(
input
);
CHECK_CUDA_INPUT
(
target
);
CHECK_CUDA_INPUT
(
weight
);
CHECK_CUDA_INPUT
(
output
);
softmax_focal_loss_forward_cuda
(
input
,
target
,
weight
,
output
,
gamma
,
alpha
);
#else
AT_ERROR
(
"SoftmaxFocalLoss is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"SoftmaxFocalLoss is not implemented on CPU"
);
}
}
}
void
softmax_focal_loss_backward
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
void
softmax_focal_loss_backward
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
Tensor
buff
,
Tensor
grad_input
,
float
gamma
,
Tensor
buff
,
Tensor
grad_input
,
float
gamma
,
float
alpha
)
{
float
alpha
)
{
if
(
input
.
device
().
is_cuda
())
{
softmax_focal_loss_backward_impl
(
input
,
target
,
weight
,
buff
,
grad_input
,
#ifdef MMCV_WITH_CUDA
gamma
,
alpha
);
CHECK_CUDA_INPUT
(
input
);
CHECK_CUDA_INPUT
(
target
);
CHECK_CUDA_INPUT
(
weight
);
CHECK_CUDA_INPUT
(
buff
);
CHECK_CUDA_INPUT
(
grad_input
);
softmax_focal_loss_backward_cuda
(
input
,
target
,
weight
,
buff
,
grad_input
,
gamma
,
alpha
);
#else
AT_ERROR
(
"SoftmaxFocalLoss is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"SoftmaxFocalLoss is not implemented on CPU"
);
}
}
}
mmcv/ops/csrc/parrots/furthest_point_sample.cpp
View file @
fdeee889
...
@@ -2,61 +2,33 @@
...
@@ -2,61 +2,33 @@
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/sampling.cpp
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/sampling.cpp
#include "pytorch_cpp_helper.hpp"
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
void
furthest_point_sampling_forward_impl
(
Tensor
points_tensor
,
void
FurthestPointSamplingForwardCUDAKernelLauncher
(
int
b
,
int
n
,
int
m
,
Tensor
temp_tensor
,
Tensor
idx_tensor
,
const
float
*
dataset
,
int
b
,
int
n
,
int
m
)
{
float
*
temp
,
int
*
idxs
);
DISPATCH_DEVICE_IMPL
(
furthest_point_sampling_forward_impl
,
points_tensor
,
temp_tensor
,
idx_tensor
,
b
,
n
,
m
);
void
furthest_point_sampling_forward_cuda
(
int
b
,
int
n
,
int
m
,
const
float
*
dataset
,
float
*
temp
,
int
*
idxs
)
{
FurthestPointSamplingForwardCUDAKernelLauncher
(
b
,
n
,
m
,
dataset
,
temp
,
idxs
);
}
}
void
FurthestPointSamplingWithDistForwardCUDAKernelLauncher
(
void
furthest_point_sampling_with_dist_forward_impl
(
Tensor
points_tensor
,
int
b
,
int
n
,
int
m
,
const
float
*
dataset
,
float
*
temp
,
int
*
idxs
);
Tensor
temp_tensor
,
Tensor
idx_tensor
,
int
b
,
void
furthest_point_sampling_with_dist_forward_cuda
(
int
b
,
int
n
,
int
m
,
int
n
,
int
m
)
{
const
float
*
dataset
,
DISPATCH_DEVICE_IMPL
(
furthest_point_sampling_with_dist_forward_impl
,
float
*
temp
,
int
*
idxs
)
{
points_tensor
,
temp_tensor
,
idx_tensor
,
b
,
n
,
m
);
FurthestPointSamplingWithDistForwardCUDAKernelLauncher
(
b
,
n
,
m
,
dataset
,
temp
,
idxs
);
}
}
#endif
void
furthest_point_sampling_forward
(
Tensor
points_tensor
,
Tensor
temp_tensor
,
void
furthest_point_sampling_forward
(
Tensor
points_tensor
,
Tensor
temp_tensor
,
Tensor
idx_tensor
,
int
b
,
int
n
,
int
m
)
{
Tensor
idx_tensor
,
int
b
,
int
n
,
int
m
)
{
if
(
points_tensor
.
device
().
is_cuda
())
{
furthest_point_sampling_forward_impl
(
points_tensor
,
temp_tensor
,
idx_tensor
,
#ifdef MMCV_WITH_CUDA
b
,
n
,
m
);
const
float
*
points
=
points_tensor
.
data_ptr
<
float
>
();
float
*
temp
=
temp_tensor
.
data_ptr
<
float
>
();
int
*
idx
=
idx_tensor
.
data_ptr
<
int
>
();
furthest_point_sampling_forward_cuda
(
b
,
n
,
m
,
points
,
temp
,
idx
);
#else
AT_ERROR
(
"furthest_point_sampling is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"furthest_point_sampling is not implemented on CPU"
);
}
}
}
void
furthest_point_sampling_with_dist_forward
(
Tensor
points_tensor
,
void
furthest_point_sampling_with_dist_forward
(
Tensor
points_tensor
,
Tensor
temp_tensor
,
Tensor
temp_tensor
,
Tensor
idx_tensor
,
int
b
,
int
n
,
Tensor
idx_tensor
,
int
b
,
int
n
,
int
m
)
{
int
m
)
{
if
(
points_tensor
.
device
().
is_cuda
())
{
furthest_point_sampling_with_dist_forward_impl
(
points_tensor
,
temp_tensor
,
#ifdef MMCV_WITH_CUDA
idx_tensor
,
b
,
n
,
m
);
const
float
*
points
=
points_tensor
.
data
<
float
>
();
float
*
temp
=
temp_tensor
.
data
<
float
>
();
int
*
idx
=
idx_tensor
.
data
<
int
>
();
furthest_point_sampling_with_dist_forward_cuda
(
b
,
n
,
m
,
points
,
temp
,
idx
);
#else
AT_ERROR
(
"furthest_point_sampling_with_dist is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"furthest_point_sampling_with_dist is not implemented on CPU"
);
}
}
}
mmcv/ops/csrc/parrots/fused_bias_leakyrelu.cpp
View file @
fdeee889
// Copyright (c) OpenMMLab. All rights reserved
// Modified from
// Modified from
// from
// https://github.com/rosinality/stylegan2-pytorch/blob/master/op/fused_bias_act.cpp
// https://github.com/rosinality/stylegan2-pytorch/blob/master/op/fused_bias_act.cpp
#include "pytorch_cpp_helper.hpp"
#ifdef MMCV_WITH_CUDA
/*
torch
::
Tensor
fused_bias_leakyrelu_op
(
const
torch
::
Tensor
&
input
,
Copyright (c) 2021, NVIDIA Corporation. All rights reserved.
const
torch
::
Tensor
&
bias
,
const
torch
::
Tensor
&
refer
,
int
act
,
NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator
int
grad
,
float
alpha
,
float
scale
);
Augmentation (ADA)
=======================================================================
1. Definitions
"Licensor" means any person or entity that distributes its Work.
"Software" means the original work of authorship made available under
this License.
"Work" means the Software and any additions to or derivative works of
the Software that are made available under this License.
The terms "reproduce," "reproduction," "derivative works," and
"distribution" have the meaning as provided under U.S. copyright law;
provided, however, that for the purposes of this License, derivative
works shall not include works that remain separable from, or merely
link (or bind by name) to the interfaces of, the Work.
Works, including the Software, are "made available" under this License
by including in or with the Work either (a) a copyright notice
referencing the applicability of this License to the Work, or (b) a
copy of this License.
2. License Grants
2.1 Copyright Grant. Subject to the terms and conditions of this
License, each Licensor grants to you a perpetual, worldwide,
non-exclusive, royalty-free, copyright license to reproduce,
prepare derivative works of, publicly display, publicly perform,
sublicense and distribute its Work and any resulting derivative
works in any form.
3. Limitations
3.1 Redistribution. You may reproduce or distribute the Work only
if (a) you do so under this License, (b) you include a complete
copy of this License with your distribution, and (c) you retain
without modification any copyright, patent, trademark, or
attribution notices that are present in the Work.
3.2 Derivative Works. You may specify that additional or different
terms apply to the use, reproduction, and distribution of your
derivative works of the Work ("Your Terms") only if (a) Your Terms
provide that the use limitation in Section 3.3 applies to your
derivative works, and (b) you identify the specific derivative
works that are subject to Your Terms. Notwithstanding Your Terms,
this License (including the redistribution requirements in Section
3.1) will continue to apply to the Work itself.
#endif
3.3 Use Limitation. The Work and any derivative works thereof only
may be used or intended for use non-commercially. Notwithstanding
the foregoing, NVIDIA and its affiliates may use the Work and any
derivative works commercially. As used herein, "non-commercially"
means for research or evaluation purposes only.
3.4 Patent Claims. If you bring or threaten to bring a patent claim
against any Licensor (including any claim, cross-claim or
counterclaim in a lawsuit) to enforce any patents that you allege
are infringed by any Work, then your rights under this License from
such Licensor (including the grant in Section 2.1) will terminate
immediately.
3.5 Trademarks. This License does not grant any rights to use any
Licensor’s or its affiliates’ names, logos, or trademarks, except
as necessary to reproduce the notices described in this License.
3.6 Termination. If you violate any term of this License, then your
rights under this License (including the grant in Section 2.1) will
terminate immediately.
4. Disclaimer of Warranty.
THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR
NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER
THIS LICENSE.
5. Limitation of Liability.
EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK
(INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION,
LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER
COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF
THE POSSIBILITY OF SUCH DAMAGES.
=======================================================================
*/
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
torch
::
Tensor
fused_bias_leakyrelu_op_impl
(
const
torch
::
Tensor
&
input
,
const
torch
::
Tensor
&
bias
,
const
torch
::
Tensor
&
refer
,
int
act
,
int
grad
,
float
alpha
,
float
scale
)
{
return
DISPATCH_DEVICE_IMPL
(
fused_bias_leakyrelu_op_impl
,
input
,
bias
,
refer
,
act
,
grad
,
alpha
,
scale
);
}
torch
::
Tensor
fused_bias_leakyrelu
(
const
torch
::
Tensor
&
input
,
torch
::
Tensor
fused_bias_leakyrelu
(
const
torch
::
Tensor
&
input
,
const
torch
::
Tensor
&
bias
,
const
torch
::
Tensor
&
bias
,
const
torch
::
Tensor
&
refer
,
int
act
,
const
torch
::
Tensor
&
refer
,
int
act
,
int
grad
,
float
alpha
,
float
scale
)
{
int
grad
,
float
alpha
,
float
scale
)
{
#ifdef MMCV_WITH_CUDA
return
fused_bias_leakyrelu_op_impl
(
input
,
bias
,
refer
,
act
,
grad
,
alpha
,
CHECK_CUDA
(
input
);
scale
);
CHECK_CUDA
(
bias
);
return
fused_bias_leakyrelu_op
(
input
,
bias
,
refer
,
act
,
grad
,
alpha
,
scale
);
#else
AT_ERROR
(
"Fused bias leakyrelu is not compiled with GPU support"
);
#endif
}
}
mmcv/ops/csrc/parrots/gather_points.cpp
View file @
fdeee889
#include "pytorch_cpp_helper.hpp"
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
void
gather_points_forward_impl
(
int
b
,
int
c
,
int
n
,
int
npoints
,
void
GatherPointsForwardCUDAKernelLauncher
(
int
b
,
int
c
,
int
n
,
int
npoints
,
const
Tensor
points
,
const
Tensor
idx
,
Tensor
out
);
void
gather_points_forward_cuda
(
int
b
,
int
c
,
int
n
,
int
npoints
,
const
Tensor
points
,
const
Tensor
idx
,
const
Tensor
points
,
const
Tensor
idx
,
Tensor
out
)
{
Tensor
out
)
{
GatherPointsForwardCUDAKernelLauncher
(
b
,
c
,
n
,
npoints
,
points
,
idx
,
out
);
DISPATCH_DEVICE_IMPL
(
gather_points_forward_impl
,
b
,
c
,
n
,
npoints
,
points
,
};
idx
,
out
);
}
void
GatherPointsBackwardCUDAKernelLauncher
(
int
b
,
int
c
,
int
n
,
int
npoints
,
const
Tensor
grad_out
,
const
Tensor
idx
,
Tensor
grad_points
);
void
gather_points_backward_
cuda
(
int
b
,
int
c
,
int
n
,
int
npoints
,
void
gather_points_backward_
impl
(
int
b
,
int
c
,
int
n
,
int
npoints
,
const
Tensor
grad_out
,
const
Tensor
idx
,
const
Tensor
grad_out
,
const
Tensor
idx
,
Tensor
grad_points
)
{
Tensor
grad_points
)
{
GatherPointsBackwardCUDAKernelLauncher
(
b
,
c
,
n
,
npoints
,
grad_out
,
idx
,
DISPATCH_DEVICE_IMPL
(
gather_points_backward_impl
,
b
,
c
,
n
,
npoints
,
grad_out
,
grad_points
);
idx
,
grad_points
);
};
}
#endif
void
gather_points_forward
(
Tensor
points_tensor
,
Tensor
idx_tensor
,
void
gather_points_forward
(
Tensor
points_tensor
,
Tensor
idx_tensor
,
Tensor
out_tensor
,
int
b
,
int
c
,
int
n
,
Tensor
out_tensor
,
int
b
,
int
c
,
int
n
,
int
npoints
)
{
int
npoints
)
{
if
(
points_tensor
.
device
().
is_cuda
())
{
gather_points_forward_impl
(
b
,
c
,
n
,
npoints
,
points_tensor
,
idx_tensor
,
#ifdef MMCV_WITH_CUDA
out_tensor
);
gather_points_forward_cuda
(
b
,
c
,
n
,
npoints
,
points_tensor
,
idx_tensor
,
out_tensor
);
#else
AT_ERROR
(
"gather_points is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"gather_points is not implemented on CPU"
);
}
}
}
void
gather_points_backward
(
Tensor
grad_out_tensor
,
Tensor
idx_tensor
,
void
gather_points_backward
(
Tensor
grad_out_tensor
,
Tensor
idx_tensor
,
Tensor
grad_points_tensor
,
int
b
,
int
c
,
int
n
,
Tensor
grad_points_tensor
,
int
b
,
int
c
,
int
n
,
int
npoints
)
{
int
npoints
)
{
if
(
grad_out_tensor
.
device
().
is_cuda
())
{
gather_points_backward_impl
(
b
,
c
,
n
,
npoints
,
grad_out_tensor
,
idx_tensor
,
#ifdef MMCV_WITH_CUDA
grad_points_tensor
);
gather_points_backward_cuda
(
b
,
c
,
n
,
npoints
,
grad_out_tensor
,
idx_tensor
,
grad_points_tensor
);
#else
AT_ERROR
(
"gather_points is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"gather_points is not implemented on CPU"
);
}
}
}
mmcv/ops/csrc/parrots/group_points.cpp
View file @
fdeee889
...
@@ -3,56 +3,32 @@
...
@@ -3,56 +3,32 @@
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/group_points.cpp
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/group_points.cpp
#include "pytorch_cpp_helper.hpp"
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
void
group_points_forward_impl
(
int
b
,
int
c
,
int
n
,
int
npoints
,
int
nsample
,
void
GroupPointsForwardCUDAKernelLauncher
(
int
b
,
int
c
,
int
n
,
int
npoints
,
int
nsample
,
const
Tensor
points
,
const
Tensor
idx
,
Tensor
out
);
void
group_points_forward_cuda
(
int
b
,
int
c
,
int
n
,
int
npoints
,
int
nsample
,
const
Tensor
points
,
const
Tensor
idx
,
const
Tensor
points
,
const
Tensor
idx
,
Tensor
out
)
{
Tensor
out
)
{
G
roup
P
oints
F
orward
CUDAKernelLauncher
(
b
,
c
,
n
,
npoints
,
nsample
,
points
,
idx
,
DISPATCH_DEVICE_IMPL
(
g
roup
_p
oints
_f
orward
_impl
,
b
,
c
,
n
,
npoints
,
nsample
,
out
);
points
,
idx
,
out
);
}
;
}
void
GroupPointsBackwardCUDAKernelLauncher
(
int
b
,
int
c
,
int
n
,
int
npoints
,
void
group_points_backward_impl
(
int
b
,
int
c
,
int
n
,
int
npoints
,
int
nsample
,
int
nsample
,
const
Tensor
grad_out
,
const
Tensor
idx
,
Tensor
grad_points
);
void
group_points_backward_cuda
(
int
b
,
int
c
,
int
n
,
int
npoints
,
int
nsample
,
const
Tensor
grad_out
,
const
Tensor
idx
,
const
Tensor
grad_out
,
const
Tensor
idx
,
Tensor
grad_points
)
{
Tensor
grad_points
)
{
GroupPointsBackwardCUDAKernelLauncher
(
b
,
c
,
n
,
npoints
,
nsample
,
grad_out
,
DISPATCH_DEVICE_IMPL
(
group_points_backward_impl
,
b
,
c
,
n
,
npoints
,
nsample
,
idx
,
grad_points
);
grad_out
,
idx
,
grad_points
);
};
}
#endif
void
group_points_forward
(
Tensor
points_tensor
,
Tensor
idx_tensor
,
void
group_points_forward
(
Tensor
points_tensor
,
Tensor
idx_tensor
,
Tensor
out_tensor
,
int
b
,
int
c
,
int
n
,
int
npoints
,
Tensor
out_tensor
,
int
b
,
int
c
,
int
n
,
int
npoints
,
int
nsample
)
{
int
nsample
)
{
if
(
points_tensor
.
device
().
is_cuda
())
{
DISPATCH_DEVICE_IMPL
(
group_points_forward_impl
,
b
,
c
,
n
,
npoints
,
nsample
,
#ifdef MMCV_WITH_CUDA
points_tensor
,
idx_tensor
,
out_tensor
);
group_points_forward_cuda
(
b
,
c
,
n
,
npoints
,
nsample
,
points_tensor
,
idx_tensor
,
out_tensor
);
#else
AT_ERROR
(
"group_points is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"group_points is not implemented on CPU"
);
}
}
}
void
group_points_backward
(
Tensor
grad_out_tensor
,
Tensor
idx_tensor
,
void
group_points_backward
(
Tensor
grad_out_tensor
,
Tensor
idx_tensor
,
Tensor
grad_points_tensor
,
int
b
,
int
c
,
int
n
,
Tensor
grad_points_tensor
,
int
b
,
int
c
,
int
n
,
int
npoints
,
int
nsample
)
{
int
npoints
,
int
nsample
)
{
if
(
grad_out_tensor
.
device
().
is_cuda
())
{
group_points_backward_impl
(
b
,
c
,
n
,
npoints
,
nsample
,
grad_out_tensor
,
#ifdef MMCV_WITH_CUDA
idx_tensor
,
grad_points_tensor
);
group_points_backward_cuda
(
b
,
c
,
n
,
npoints
,
nsample
,
grad_out_tensor
,
idx_tensor
,
grad_points_tensor
);
#else
AT_ERROR
(
"group_points is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"group_points is not implemented on CPU"
);
}
}
}
mmcv/ops/csrc/parrots/info.cpp
0 → 100644
View file @
fdeee889
// Copyright (c) OpenMMLab. All rights reserved
// modified from
// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/vision.cpp
#include "pytorch_cpp_helper.hpp"
#ifdef MMCV_WITH_CUDA
#ifndef HIP_DIFF
#include <cuda_runtime_api.h>
int
get_cudart_version
()
{
return
CUDART_VERSION
;
}
#endif
#endif
std
::
string
get_compiling_cuda_version
()
{
#ifdef MMCV_WITH_CUDA
#ifndef HIP_DIFF
std
::
ostringstream
oss
;
// copied from
// https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231
auto
printCudaStyleVersion
=
[
&
](
int
v
)
{
oss
<<
(
v
/
1000
)
<<
"."
<<
(
v
/
10
%
100
);
if
(
v
%
10
!=
0
)
{
oss
<<
"."
<<
(
v
%
10
);
}
};
printCudaStyleVersion
(
get_cudart_version
());
return
oss
.
str
();
#else
return
std
::
string
(
"rocm not available"
);
#endif
#else
return
std
::
string
(
"not available"
);
#endif
}
// similar to
// https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp
std
::
string
get_compiler_version
()
{
std
::
ostringstream
ss
;
#if defined(__GNUC__)
#ifndef __clang__
{
ss
<<
"GCC "
<<
__GNUC__
<<
"."
<<
__GNUC_MINOR__
;
}
#endif
#endif
#if defined(__clang_major__)
{
ss
<<
"clang "
<<
__clang_major__
<<
"."
<<
__clang_minor__
<<
"."
<<
__clang_patchlevel__
;
}
#endif
#if defined(_MSC_VER)
{
ss
<<
"MSVC "
<<
_MSC_FULL_VER
;
}
#endif
return
ss
.
str
();
}
mmcv/ops/csrc/parrots/iou3d.cpp
View file @
fdeee889
...
@@ -8,225 +8,128 @@ All Rights Reserved 2019-2020.
...
@@ -8,225 +8,128 @@ All Rights Reserved 2019-2020.
*/
*/
#include "pytorch_cpp_helper.hpp"
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
const
int
THREADS_PER_BLOCK_NMS
=
sizeof
(
unsigned
long
long
)
*
8
;
const
int
THREADS_PER_BLOCK_NMS
=
sizeof
(
unsigned
long
long
)
*
8
;
#ifdef MMCV_WITH_CUDA
void
iou3d_boxes_overlap_bev_forward_impl
(
const
int
num_a
,
const
Tensor
boxes_a
,
#include <cuda.h>
#include <cuda_runtime_api.h>
#define CHECK_ERROR(state) \
{ gpuAssert((state), __FILE__, __LINE__); }
inline
void
gpuAssert
(
cudaError_t
code
,
const
char
*
file
,
int
line
,
bool
abort
=
true
)
{
if
(
code
!=
cudaSuccess
)
{
fprintf
(
stderr
,
"GPUassert: %s %s %d
\n
"
,
cudaGetErrorString
(
code
),
file
,
line
);
if
(
abort
)
exit
(
code
);
}
}
void
IoU3DBoxesOverlapBevForwardCUDAKernelLauncher
(
const
int
num_a
,
const
Tensor
boxes_a
,
const
int
num_b
,
const
Tensor
boxes_b
,
Tensor
ans_overlap
);
void
iou3d_boxes_overlap_bev_forward_cuda
(
const
int
num_a
,
const
Tensor
boxes_a
,
const
int
num_b
,
const
Tensor
boxes_b
,
const
int
num_b
,
const
Tensor
boxes_b
,
Tensor
ans_overlap
)
{
Tensor
ans_overlap
)
{
IoU3DBoxesOverlapBevForwardCUDAKernelLauncher
(
num_a
,
boxes_a
,
num_b
,
boxes_b
,
DISPATCH_DEVICE_IMPL
(
iou3d_boxes_overlap_bev_forward_impl
,
num_a
,
boxes_a
,
ans_overlap
);
num_b
,
boxes_b
,
ans_overlap
);
};
}
void
IoU3DBoxesIoUBevForwardCUDAKernelLauncher
(
const
int
num_a
,
const
Tensor
boxes_a
,
const
int
num_b
,
const
Tensor
boxes_b
,
Tensor
ans_iou
);
void
iou3d_boxes_iou_bev_forward_cuda
(
const
int
num_a
,
const
Tensor
boxes_a
,
const
int
num_b
,
const
Tensor
boxes_b
,
Tensor
ans_iou
)
{
IoU3DBoxesIoUBevForwardCUDAKernelLauncher
(
num_a
,
boxes_a
,
num_b
,
boxes_b
,
ans_iou
);
};
void
IoU3DNMSForwardCUDAKernelLauncher
(
const
Tensor
boxes
,
unsigned
long
long
*
mask
,
int
boxes_num
,
float
nms_overlap_thresh
);
void
iou3d_nms_forward_cuda
(
const
Tensor
boxes
,
unsigned
long
long
*
mask
,
int
boxes_num
,
float
nms_overlap_thresh
)
{
IoU3DNMSForwardCUDAKernelLauncher
(
boxes
,
mask
,
boxes_num
,
nms_overlap_thresh
);
};
void
IoU3DNMSNormalForwardCUDAKernelLauncher
(
const
Tensor
boxes
,
unsigned
long
long
*
mask
,
int
boxes_num
,
float
nms_overlap_thresh
);
void
iou3d_nms_normal_forward_cuda
(
const
Tensor
boxes
,
unsigned
long
long
*
mask
,
int
boxes_num
,
float
nms_overlap_thresh
)
{
IoU3DNMSNormalForwardCUDAKernelLauncher
(
boxes
,
mask
,
boxes_num
,
nms_overlap_thresh
);
};
#endif
void
iou3d_
boxes_overlap_bev_forward
(
Tensor
boxes_a
,
Tensor
boxes_b
,
void
iou3d_
nms3d_forward_impl
(
const
Tensor
boxes
,
unsigned
long
long
*
mask
,
Tensor
an
s_overlap
)
{
int
boxes_num
,
float
nm
s_overlap
_thresh
)
{
// params boxes_a: (N, 5) [x1, y1, x2, y2, ry]
DISPATCH_DEVICE_IMPL
(
iou3d_nms3d_forward_impl
,
boxes
,
mask
,
boxes_num
,
// params boxes_b: (M, 5)
nms_overlap_thresh
);
// params ans_overlap: (N, M)
}
if
(
boxes_a
.
device
().
is_cuda
())
{
void
iou3d_nms3d_normal_forward_impl
(
const
Tensor
boxes
,
#ifdef MMCV_WITH_CUDA
unsigned
long
long
*
mask
,
int
boxes_num
,
CHECK_CUDA_INPUT
(
boxes_a
);
float
nms_overlap_thresh
)
{
CHECK_CUDA_INPUT
(
boxes_b
);
DISPATCH_DEVICE_IMPL
(
iou3d_nms3d_normal_forward_impl
,
boxes
,
mask
,
boxes_num
,
CHECK_CUDA_INPUT
(
ans_overlap
);
nms_overlap_thresh
);
int
num_a
=
boxes_a
.
size
(
0
);
int
num_b
=
boxes_b
.
size
(
0
);
iou3d_boxes_overlap_bev_forward_cuda
(
num_a
,
boxes_a
,
num_b
,
boxes_b
,
ans_overlap
);
#else
AT_ERROR
(
"iou3d_boxes_overlap_bev is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"iou3d_boxes_overlap_bev is not implemented on CPU"
);
}
}
}
void
iou3d_boxes_
iou
_bev_forward
(
Tensor
boxes_a
,
Tensor
boxes_b
,
void
iou3d_boxes_
overlap
_bev_forward
(
Tensor
boxes_a
,
Tensor
boxes_b
,
Tensor
ans_
iou
)
{
Tensor
ans_
overlap
)
{
// params boxes
_a
: (N,
5
) [x
1
, y
1, x2, y2, ry
]
// params boxes: (N,
7
) [x, y
, z, dx, dy, dz, heading
]
// params boxes_b: (M, 5)
// params boxes_b: (M, 5)
// params ans_overlap: (N, M)
// params ans_overlap: (N, M)
int
num_a
=
boxes_a
.
size
(
0
);
int
num_b
=
boxes_b
.
size
(
0
);
if
(
boxes_a
.
device
().
is_cuda
())
{
iou3d_boxes_overlap_bev_forward_impl
(
num_a
,
boxes_a
,
num_b
,
boxes_b
,
#ifdef MMCV_WITH_CUDA
ans_overlap
);
CHECK_CUDA_INPUT
(
boxes_a
);
CHECK_CUDA_INPUT
(
boxes_b
);
CHECK_CUDA_INPUT
(
ans_iou
);
int
num_a
=
boxes_a
.
size
(
0
);
int
num_b
=
boxes_b
.
size
(
0
);
iou3d_boxes_iou_bev_forward_cuda
(
num_a
,
boxes_a
,
num_b
,
boxes_b
,
ans_iou
);
#else
AT_ERROR
(
"iou3d_boxes_iou_bev is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"iou3d_boxes_iou_bev is not implemented on CPU"
);
}
}
}
void
iou3d_nms_forward
(
Tensor
boxes
,
Tensor
keep
,
Tensor
keep_num
,
void
iou3d_nms
3d
_forward
(
Tensor
boxes
,
Tensor
keep
,
Tensor
keep_num
,
float
nms_overlap_thresh
)
{
float
nms_overlap_thresh
)
{
// params boxes: (N,
5
) [x
1
, y
1
,
x2, y2, ry
]
// params boxes: (N,
7
) [x, y,
z, dx, dy, dz, heading
]
// params keep: (N)
// params keep: (N)
CHECK_CONTIGUOUS
(
boxes
);
CHECK_CONTIGUOUS
(
keep
);
if
(
boxes
.
device
().
is_cuda
())
{
int
boxes_num
=
boxes
.
size
(
0
);
#ifdef MMCV_WITH_CUDA
int64_t
*
keep_data
=
keep
.
data_ptr
<
int64_t
>
();
CHECK_CUDA_INPUT
(
boxes
);
int64_t
*
keep_num_data
=
keep_num
.
data_ptr
<
int64_t
>
();
CHECK_CONTIGUOUS
(
keep
);
int
boxes_num
=
boxes
.
size
(
0
);
int64_t
*
keep_data
=
keep
.
data_ptr
<
int64_t
>
();
int64_t
*
keep_num_data
=
keep_num
.
data_ptr
<
int64_t
>
();
const
int
col_blocks
=
DIVUP
(
boxes_num
,
THREADS_PER_BLOCK_NMS
);
const
int
col_blocks
=
(
boxes_num
+
THREADS_PER_BLOCK_NMS
-
1
)
/
THREADS_PER_BLOCK_NMS
;
Tensor
mask
=
Tensor
mask
=
at
::
empty
({
boxes_num
,
col_blocks
},
boxes
.
options
().
dtype
(
at
::
kLong
));
at
::
empty
({
boxes_num
,
col_blocks
},
boxes
.
options
().
dtype
(
at
::
kLong
));
unsigned
long
long
*
mask_data
=
unsigned
long
long
*
mask_data
=
(
unsigned
long
long
*
)
mask
.
data_ptr
<
int64_t
>
();
(
unsigned
long
long
*
)
mask
.
data_ptr
<
int64_t
>
();
iou3d_nms_forward_
cuda
(
boxes
,
mask_data
,
boxes_num
,
nms_overlap_thresh
);
iou3d_nms
3d
_forward_
impl
(
boxes
,
mask_data
,
boxes_num
,
nms_overlap_thresh
);
at
::
Tensor
mask_cpu
=
mask
.
to
(
at
::
kCPU
);
at
::
Tensor
mask_cpu
=
mask
.
to
(
at
::
kCPU
);
unsigned
long
long
*
mask_host
=
unsigned
long
long
*
mask_host
=
(
unsigned
long
long
*
)
mask_cpu
.
data_ptr
<
int64_t
>
();
(
unsigned
long
long
*
)
mask_cpu
.
data_ptr
<
int64_t
>
();
std
::
vector
<
unsigned
long
long
>
remv_cpu
(
col_blocks
);
std
::
vector
<
unsigned
long
long
>
remv_cpu
(
col_blocks
);
memset
(
&
remv_cpu
[
0
],
0
,
sizeof
(
unsigned
long
long
)
*
col_blocks
);
memset
(
&
remv_cpu
[
0
],
0
,
sizeof
(
unsigned
long
long
)
*
col_blocks
);
int
num_to_keep
=
0
;
int
num_to_keep
=
0
;
for
(
int
i
=
0
;
i
<
boxes_num
;
i
++
)
{
for
(
int
i
=
0
;
i
<
boxes_num
;
i
++
)
{
int
nblock
=
i
/
THREADS_PER_BLOCK_NMS
;
int
nblock
=
i
/
THREADS_PER_BLOCK_NMS
;
int
inblock
=
i
%
THREADS_PER_BLOCK_NMS
;
int
inblock
=
i
%
THREADS_PER_BLOCK_NMS
;
if
(
!
(
remv_cpu
[
nblock
]
&
(
1ULL
<<
inblock
)))
{
if
(
!
(
remv_cpu
[
nblock
]
&
(
1ULL
<<
inblock
)))
{
keep_data
[
num_to_keep
++
]
=
i
;
keep_data
[
num_to_keep
++
]
=
i
;
unsigned
long
long
*
p
=
&
mask_host
[
0
]
+
i
*
col_blocks
;
unsigned
long
long
*
p
=
&
mask_host
[
0
]
+
i
*
col_blocks
;
for
(
int
j
=
nblock
;
j
<
col_blocks
;
j
++
)
{
for
(
int
j
=
nblock
;
j
<
col_blocks
;
j
++
)
{
remv_cpu
[
j
]
|=
p
[
j
];
remv_cpu
[
j
]
|=
p
[
j
];
}
}
}
}
}
if
(
cudaSuccess
!=
cudaGetLastError
())
printf
(
"Error!
\n
"
);
*
keep_num_data
=
num_to_keep
;
*
keep_num_data
=
num_to_keep
;
#else
AT_ERROR
(
"iou3d_nms is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"iou3d_nms is not implemented on CPU"
);
}
}
}
}
void
iou3d_nms_normal_forward
(
Tensor
boxes
,
Tensor
keep
,
Tensor
keep_num
,
void
iou3d_nms
3d
_normal_forward
(
Tensor
boxes
,
Tensor
keep
,
Tensor
keep_num
,
float
nms_overlap_thresh
)
{
float
nms_overlap_thresh
)
{
// params boxes: (N,
5
) [x
1
, y
1
,
x2, y2, ry
]
// params boxes: (N,
7
) [x, y,
z, dx, dy, dz, heading
]
// params keep: (N)
// params keep: (N)
if
(
boxes
.
device
().
is_cuda
())
{
CHECK_CONTIGUOUS
(
boxes
);
#ifdef MMCV_WITH_CUDA
CHECK_CONTIGUOUS
(
keep
);
CHECK_CUDA_INPUT
(
boxes
);
CHECK_CONTIGUOUS
(
keep
);
int
boxes_num
=
boxes
.
size
(
0
);
int
boxes_num
=
boxes
.
size
(
0
);
int64_t
*
keep_data
=
keep
.
data_ptr
<
int64_t
>
();
int64_t
*
keep_data
=
keep
.
data_ptr
<
int64_t
>
();
int64_t
*
keep_num_data
=
keep_num
.
data_ptr
<
int64_t
>
();
int64_t
*
keep_num_data
=
keep_num
.
data_ptr
<
int64_t
>
();
const
int
col_blocks
=
DIVUP
(
boxes_num
,
THREADS_PER_BLOCK_NMS
);
const
int
col_blocks
=
(
boxes_num
+
THREADS_PER_BLOCK_NMS
-
1
)
/
THREADS_PER_BLOCK_NMS
;
Tensor
mask
=
Tensor
mask
=
at
::
empty
({
boxes_num
,
col_blocks
},
boxes
.
options
().
dtype
(
at
::
kLong
));
at
::
empty
({
boxes_num
,
col_blocks
},
boxes
.
options
().
dtype
(
at
::
kLong
));
unsigned
long
long
*
mask_data
=
unsigned
long
long
*
mask_data
=
(
unsigned
long
long
*
)
mask
.
data_ptr
<
int64_t
>
();
(
unsigned
long
long
*
)
mask
.
data_ptr
<
int64_t
>
();
iou3d_nms_normal_forward_
cuda
(
boxes
,
mask_data
,
boxes_num
,
iou3d_nms
3d
_normal_forward_
impl
(
boxes
,
mask_data
,
boxes_num
,
nms_overlap_thresh
);
nms_overlap_thresh
);
at
::
Tensor
mask_cpu
=
mask
.
to
(
at
::
kCPU
);
at
::
Tensor
mask_cpu
=
mask
.
to
(
at
::
kCPU
);
unsigned
long
long
*
mask_host
=
unsigned
long
long
*
mask_host
=
(
unsigned
long
long
*
)
mask_cpu
.
data_ptr
<
int64_t
>
();
(
unsigned
long
long
*
)
mask_cpu
.
data_ptr
<
int64_t
>
();
std
::
vector
<
unsigned
long
long
>
remv_cpu
(
col_blocks
);
std
::
vector
<
unsigned
long
long
>
remv_cpu
(
col_blocks
);
memset
(
&
remv_cpu
[
0
],
0
,
sizeof
(
unsigned
long
long
)
*
col_blocks
);
memset
(
&
remv_cpu
[
0
],
0
,
sizeof
(
unsigned
long
long
)
*
col_blocks
);
int
num_to_keep
=
0
;
int
num_to_keep
=
0
;
for
(
int
i
=
0
;
i
<
boxes_num
;
i
++
)
{
for
(
int
i
=
0
;
i
<
boxes_num
;
i
++
)
{
int
nblock
=
i
/
THREADS_PER_BLOCK_NMS
;
int
nblock
=
i
/
THREADS_PER_BLOCK_NMS
;
int
inblock
=
i
%
THREADS_PER_BLOCK_NMS
;
int
inblock
=
i
%
THREADS_PER_BLOCK_NMS
;
if
(
!
(
remv_cpu
[
nblock
]
&
(
1ULL
<<
inblock
)))
{
if
(
!
(
remv_cpu
[
nblock
]
&
(
1ULL
<<
inblock
)))
{
keep_data
[
num_to_keep
++
]
=
i
;
keep_data
[
num_to_keep
++
]
=
i
;
unsigned
long
long
*
p
=
&
mask_host
[
0
]
+
i
*
col_blocks
;
unsigned
long
long
*
p
=
&
mask_host
[
0
]
+
i
*
col_blocks
;
for
(
int
j
=
nblock
;
j
<
col_blocks
;
j
++
)
{
for
(
int
j
=
nblock
;
j
<
col_blocks
;
j
++
)
{
remv_cpu
[
j
]
|=
p
[
j
];
remv_cpu
[
j
]
|=
p
[
j
];
}
}
}
}
}
if
(
cudaSuccess
!=
cudaGetLastError
())
printf
(
"Error!
\n
"
);
*
keep_num_data
=
num_to_keep
;
#else
AT_ERROR
(
"iou3d_nms_normal is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"iou3d_nms_normal is not implemented on CPU"
);
}
}
*
keep_num_data
=
num_to_keep
;
}
}
mmcv/ops/csrc/parrots/iou3d_parrots.cpp
View file @
fdeee889
...
@@ -8,7 +8,7 @@
...
@@ -8,7 +8,7 @@
using
namespace
parrots
;
using
namespace
parrots
;
#ifdef MMCV_WITH_CUDA
#ifdef MMCV_WITH_CUDA
void
iou3d_boxes_
iou
_bev_forward_cuda_parrots
(
void
iou3d_boxes_
overlap
_bev_forward_cuda_parrots
(
CudaContext
&
ctx
,
const
SSElement
&
attr
,
const
OperatorBase
::
in_list_t
&
ins
,
CudaContext
&
ctx
,
const
SSElement
&
attr
,
const
OperatorBase
::
in_list_t
&
ins
,
OperatorBase
::
out_list_t
&
outs
)
{
OperatorBase
::
out_list_t
&
outs
)
{
auto
boxes_a
=
buildATensor
(
ctx
,
ins
[
0
]);
auto
boxes_a
=
buildATensor
(
ctx
,
ins
[
0
]);
...
@@ -16,12 +16,12 @@ void iou3d_boxes_iou_bev_forward_cuda_parrots(
...
@@ -16,12 +16,12 @@ void iou3d_boxes_iou_bev_forward_cuda_parrots(
auto
ans_iou
=
buildATensor
(
ctx
,
outs
[
0
]);
auto
ans_iou
=
buildATensor
(
ctx
,
outs
[
0
]);
iou3d_boxes_
iou
_bev_forward
(
boxes_a
,
boxes_b
,
ans_iou
);
iou3d_boxes_
overlap
_bev_forward
(
boxes_a
,
boxes_b
,
ans_iou
);
}
}
void
iou3d_nms_forward_cuda_parrots
(
CudaContext
&
ctx
,
const
SSElement
&
attr
,
void
iou3d_nms
3d
_forward_cuda_parrots
(
CudaContext
&
ctx
,
const
SSElement
&
attr
,
const
OperatorBase
::
in_list_t
&
ins
,
const
OperatorBase
::
in_list_t
&
ins
,
OperatorBase
::
out_list_t
&
outs
)
{
OperatorBase
::
out_list_t
&
outs
)
{
float
nms_overlap_thresh
;
float
nms_overlap_thresh
;
SSAttrs
(
attr
).
get
<
float
>
(
"nms_overlap_thresh"
,
nms_overlap_thresh
).
done
();
SSAttrs
(
attr
).
get
<
float
>
(
"nms_overlap_thresh"
,
nms_overlap_thresh
).
done
();
...
@@ -30,13 +30,13 @@ void iou3d_nms_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
...
@@ -30,13 +30,13 @@ void iou3d_nms_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
auto
keep
=
buildATensor
(
ctx
,
outs
[
0
]);
auto
keep
=
buildATensor
(
ctx
,
outs
[
0
]);
auto
keep_num
=
buildATensor
(
ctx
,
outs
[
1
]);
auto
keep_num
=
buildATensor
(
ctx
,
outs
[
1
]);
iou3d_nms_forward
(
boxes
,
keep
,
keep_num
,
nms_overlap_thresh
);
iou3d_nms
3d
_forward
(
boxes
,
keep
,
keep_num
,
nms_overlap_thresh
);
}
}
void
iou3d_nms_normal_forward_cuda_parrots
(
CudaContext
&
ctx
,
void
iou3d_nms
3d
_normal_forward_cuda_parrots
(
CudaContext
&
ctx
,
const
SSElement
&
attr
,
const
SSElement
&
attr
,
const
OperatorBase
::
in_list_t
&
ins
,
const
OperatorBase
::
in_list_t
&
ins
,
OperatorBase
::
out_list_t
&
outs
)
{
OperatorBase
::
out_list_t
&
outs
)
{
float
nms_overlap_thresh
;
float
nms_overlap_thresh
;
SSAttrs
(
attr
).
get
<
float
>
(
"nms_overlap_thresh"
,
nms_overlap_thresh
).
done
();
SSAttrs
(
attr
).
get
<
float
>
(
"nms_overlap_thresh"
,
nms_overlap_thresh
).
done
();
...
@@ -45,26 +45,26 @@ void iou3d_nms_normal_forward_cuda_parrots(CudaContext& ctx,
...
@@ -45,26 +45,26 @@ void iou3d_nms_normal_forward_cuda_parrots(CudaContext& ctx,
auto
keep
=
buildATensor
(
ctx
,
outs
[
0
]);
auto
keep
=
buildATensor
(
ctx
,
outs
[
0
]);
auto
keep_num
=
buildATensor
(
ctx
,
outs
[
1
]);
auto
keep_num
=
buildATensor
(
ctx
,
outs
[
1
]);
iou3d_nms_normal_forward
(
boxes
,
keep
,
keep_num
,
nms_overlap_thresh
);
iou3d_nms
3d
_normal_forward
(
boxes
,
keep
,
keep_num
,
nms_overlap_thresh
);
}
}
PARROTS_EXTENSION_REGISTER
(
iou3d_boxes_
iou
_bev_forward
)
PARROTS_EXTENSION_REGISTER
(
iou3d_boxes_
overlap
_bev_forward
)
.
input
(
2
)
.
input
(
2
)
.
output
(
1
)
.
output
(
1
)
.
apply
(
iou3d_boxes_
iou
_bev_forward_cuda_parrots
)
.
apply
(
iou3d_boxes_
overlap
_bev_forward_cuda_parrots
)
.
done
();
.
done
();
PARROTS_EXTENSION_REGISTER
(
iou3d_nms_forward
)
PARROTS_EXTENSION_REGISTER
(
iou3d_nms
3d
_forward
)
.
attr
(
"nms_overlap_thresh"
)
.
attr
(
"nms_overlap_thresh"
)
.
input
(
1
)
.
input
(
1
)
.
output
(
2
)
.
output
(
2
)
.
apply
(
iou3d_nms_forward_cuda_parrots
)
.
apply
(
iou3d_nms
3d
_forward_cuda_parrots
)
.
done
();
.
done
();
PARROTS_EXTENSION_REGISTER
(
iou3d_nms_normal_forward
)
PARROTS_EXTENSION_REGISTER
(
iou3d_nms
3d
_normal_forward
)
.
attr
(
"nms_overlap_thresh"
)
.
attr
(
"nms_overlap_thresh"
)
.
input
(
1
)
.
input
(
1
)
.
output
(
2
)
.
output
(
2
)
.
apply
(
iou3d_nms_normal_forward_cuda_parrots
)
.
apply
(
iou3d_nms
3d
_normal_forward_cuda_parrots
)
.
done
();
.
done
();
#endif
#endif
mmcv/ops/csrc/parrots/iou3d_pytorch.h
View file @
fdeee889
...
@@ -4,13 +4,13 @@
...
@@ -4,13 +4,13 @@
#include <torch/extension.h>
#include <torch/extension.h>
using
namespace
at
;
using
namespace
at
;
void
iou3d_boxes_
iou
_bev_forward
(
Tensor
boxes_a
,
Tensor
boxes_b
,
void
iou3d_boxes_
overlap
_bev_forward
(
Tensor
boxes_a
,
Tensor
boxes_b
,
Tensor
ans_
iou
);
Tensor
ans_
overlap
);
void
iou3d_nms_forward
(
Tensor
boxes
,
Tensor
keep
,
Tensor
keep_num
,
void
iou3d_nms
3d
_forward
(
Tensor
boxes
,
Tensor
keep
,
Tensor
keep_num
,
float
nms_overlap_thresh
);
float
nms_overlap_thresh
);
void
iou3d_nms_normal_forward
(
Tensor
boxes
,
Tensor
keep
,
Tensor
keep_num
,
void
iou3d_nms
3d
_normal_forward
(
Tensor
boxes
,
Tensor
keep
,
Tensor
keep_num
,
float
nms_overlap_thresh
);
float
nms_overlap_thresh
);
#endif // IOU_3D_PYTORCH_H
#endif // IOU_3D_PYTORCH_H
mmcv/ops/csrc/parrots/knn.cpp
View file @
fdeee889
...
@@ -2,31 +2,16 @@
...
@@ -2,31 +2,16 @@
// https://github.com/CVMI-Lab/PAConv/tree/main/scene_seg/lib/pointops/src/knnquery_heap
// https://github.com/CVMI-Lab/PAConv/tree/main/scene_seg/lib/pointops/src/knnquery_heap
#include "pytorch_cpp_helper.hpp"
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
void
knn_forward_impl
(
int
b
,
int
n
,
int
m
,
int
nsample
,
const
Tensor
xyz
,
void
KNNForwardCUDAKernelLauncher
(
int
b
,
int
n
,
int
m
,
int
nsample
,
const
Tensor
xyz
,
const
Tensor
new_xyz
,
Tensor
idx
,
Tensor
dist2
);
void
knn_forward_cuda
(
int
b
,
int
n
,
int
m
,
int
nsample
,
const
Tensor
xyz
,
const
Tensor
new_xyz
,
Tensor
idx
,
Tensor
dist2
)
{
const
Tensor
new_xyz
,
Tensor
idx
,
Tensor
dist2
)
{
KNNForwardCUDAKernelLauncher
(
b
,
n
,
m
,
nsample
,
xyz
,
new_xyz
,
idx
,
dist2
);
DISPATCH_DEVICE_IMPL
(
knn_forward_impl
,
b
,
n
,
m
,
nsample
,
xyz
,
new_xyz
,
idx
,
dist2
);
}
}
#endif
void
knn_forward
(
Tensor
xyz_tensor
,
Tensor
new_xyz_tensor
,
Tensor
idx_tensor
,
void
knn_forward
(
Tensor
xyz_tensor
,
Tensor
new_xyz_tensor
,
Tensor
idx_tensor
,
Tensor
dist2_tensor
,
int
b
,
int
n
,
int
m
,
int
nsample
)
{
Tensor
dist2_tensor
,
int
b
,
int
n
,
int
m
,
int
nsample
)
{
if
(
new_xyz_tensor
.
device
().
is_cuda
())
{
knn_forward_impl
(
b
,
n
,
m
,
nsample
,
xyz_tensor
,
new_xyz_tensor
,
idx_tensor
,
#ifdef MMCV_WITH_CUDA
dist2_tensor
);
CHECK_CUDA_INPUT
(
new_xyz_tensor
);
CHECK_CUDA_INPUT
(
xyz_tensor
);
knn_forward_cuda
(
b
,
n
,
m
,
nsample
,
xyz_tensor
,
new_xyz_tensor
,
idx_tensor
,
dist2_tensor
);
#else
AT_ERROR
(
"knn is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"knn is not implemented on CPU"
);
}
}
}
mmcv/ops/csrc/parrots/masked_conv2d.cpp
View file @
fdeee889
// Copyright (c) OpenMMLab. All rights reserved
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
void
masked_im2col_forward_impl
(
const
Tensor
im
,
const
Tensor
mask_h_idx
,
void
MaskedIm2colForwardCUDAKernelLauncher
(
const
Tensor
bottom_data
,
const
Tensor
mask_h_idx
,
const
Tensor
mask_w_idx
,
Tensor
top_data
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
pad_h
,
const
int
pad_w
);
void
MaskedCol2imForwardCUDAKernelLauncher
(
const
Tensor
bottom_data
,
const
Tensor
mask_h_idx
,
const
Tensor
mask_w_idx
,
Tensor
top_data
,
const
int
height
,
const
int
width
,
const
int
channels
);
void
masked_im2col_forward_cuda
(
const
Tensor
im
,
const
Tensor
mask_h_idx
,
const
Tensor
mask_w_idx
,
Tensor
col
,
const
Tensor
mask_w_idx
,
Tensor
col
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
pad_h
,
const
int
pad_w
)
{
const
int
pad_h
,
const
int
pad_w
)
{
// im: (n, ic, h, w), kernel size (kh, kw)
DISPATCH_DEVICE_IMPL
(
masked_im2col_forward_impl
,
im
,
mask_h_idx
,
mask_w_idx
,
// kernel: (oc, ic * kh * kw), col: (kh * kw * ic, ow * oh)
col
,
kernel_h
,
kernel_w
,
pad_h
,
pad_w
);
MaskedIm2colForwardCUDAKernelLauncher
(
im
,
mask_h_idx
,
mask_w_idx
,
col
,
kernel_h
,
kernel_w
,
pad_h
,
pad_w
);
}
}
void
masked_col2im_forward_
cuda
(
const
Tensor
col
,
const
Tensor
mask_h_idx
,
void
masked_col2im_forward_
impl
(
const
Tensor
col
,
const
Tensor
mask_h_idx
,
const
Tensor
mask_w_idx
,
Tensor
im
,
int
height
,
const
Tensor
mask_w_idx
,
Tensor
im
,
int
height
,
int
width
,
int
channels
)
{
int
width
,
int
channels
)
{
// im: (n, ic, h, w), kernel size (kh, kw)
DISPATCH_DEVICE_IMPL
(
masked_col2im_forward_impl
,
col
,
mask_h_idx
,
mask_w_idx
,
// kernel: (oc, ic * kh * kh), col: (kh * kw * ic, ow * oh)
im
,
height
,
width
,
channels
);
MaskedCol2imForwardCUDAKernelLauncher
(
col
,
mask_h_idx
,
mask_w_idx
,
im
,
height
,
width
,
channels
);
}
}
#endif
void
masked_im2col_forward
(
const
Tensor
im
,
const
Tensor
mask_h_idx
,
void
masked_im2col_forward
(
const
Tensor
im
,
const
Tensor
mask_h_idx
,
const
Tensor
mask_w_idx
,
Tensor
col
,
const
Tensor
mask_w_idx
,
Tensor
col
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
pad_h
,
const
int
pad_w
)
{
const
int
pad_h
,
const
int
pad_w
)
{
if
(
im
.
device
().
is_cuda
())
{
masked_im2col_forward_impl
(
im
,
mask_h_idx
,
mask_w_idx
,
col
,
kernel_h
,
#ifdef MMCV_WITH_CUDA
kernel_w
,
pad_h
,
pad_w
);
CHECK_CUDA_INPUT
(
im
);
CHECK_CUDA_INPUT
(
mask_h_idx
);
CHECK_CUDA_INPUT
(
mask_w_idx
);
CHECK_CUDA_INPUT
(
col
);
masked_im2col_forward_cuda
(
im
,
mask_h_idx
,
mask_w_idx
,
col
,
kernel_h
,
kernel_w
,
pad_h
,
pad_w
);
#else
AT_ERROR
(
"MaskConv is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"MaskConv is not implemented on CPU"
);
}
}
}
void
masked_col2im_forward
(
const
Tensor
col
,
const
Tensor
mask_h_idx
,
void
masked_col2im_forward
(
const
Tensor
col
,
const
Tensor
mask_h_idx
,
const
Tensor
mask_w_idx
,
Tensor
im
,
int
height
,
const
Tensor
mask_w_idx
,
Tensor
im
,
int
height
,
int
width
,
int
channels
)
{
int
width
,
int
channels
)
{
if
(
col
.
device
().
is_cuda
())
{
masked_col2im_forward_impl
(
col
,
mask_h_idx
,
mask_w_idx
,
im
,
height
,
width
,
#ifdef MMCV_WITH_CUDA
channels
);
CHECK_CUDA_INPUT
(
col
);
CHECK_CUDA_INPUT
(
mask_h_idx
);
CHECK_CUDA_INPUT
(
mask_w_idx
);
CHECK_CUDA_INPUT
(
im
);
masked_col2im_forward_cuda
(
col
,
mask_h_idx
,
mask_w_idx
,
im
,
height
,
width
,
channels
);
#else
AT_ERROR
(
"MaskConv is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"MaskConv is not implemented on CPU"
);
}
}
}
mmcv/ops/csrc/parrots/min_area_polygons.cpp
0 → 100644
View file @
fdeee889
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
void
min_area_polygons_impl
(
const
Tensor
pointsets
,
Tensor
polygons
)
{
DISPATCH_DEVICE_IMPL
(
min_area_polygons_impl
,
pointsets
,
polygons
);
}
void
min_area_polygons
(
const
Tensor
pointsets
,
Tensor
polygons
)
{
min_area_polygons_impl
(
pointsets
,
polygons
);
}
mmcv/ops/csrc/parrots/min_area_polygons_parrots.cpp
0 → 100644
View file @
fdeee889
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>
#include "min_area_polygons_pytorch.h"
using
namespace
parrots
;
#ifdef MMCV_WITH_CUDA
void
min_area_polygons_cuda_parrots
(
CudaContext
&
ctx
,
const
SSElement
&
attr
,
const
OperatorBase
::
in_list_t
&
ins
,
OperatorBase
::
out_list_t
&
outs
)
{
auto
pointsets
=
buildATensor
(
ctx
,
ins
[
0
]);
auto
polygons
=
buildATensor
(
ctx
,
outs
[
0
]);
min_area_polygons
(
pointsets
,
polygons
);
}
PARROTS_EXTENSION_REGISTER
(
min_area_polygons
)
.
input
(
1
)
.
output
(
1
)
.
apply
(
min_area_polygons_cuda_parrots
)
.
done
();
#endif
mmcv/ops/csrc/parrots/min_area_polygons_pytorch.h
0 → 100644
View file @
fdeee889
// Copyright (c) OpenMMLab. All rights reserved
#ifndef MIN_AREA_POLYGONS_PYTORCH_H
#define MIN_AREA_POLYGONS_PYTORCH_H
#include <torch/extension.h>
using
namespace
at
;
void
min_area_polygons
(
const
Tensor
pointsets
,
Tensor
polygons
);
#endif // MIN_AREA_POLYGONS_PYTORCH_H
Prev
1
…
10
11
12
13
14
15
16
17
18
…
23
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment