Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
d1aac35d
Commit
d1aac35d
authored
Apr 14, 2020
by
zhangwenwei
Browse files
Initial commit
parents
Changes
214
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
3053 additions
and
0 deletions
+3053
-0
mmdet3d/ops/iou3d/src/iou3d.cpp
mmdet3d/ops/iou3d/src/iou3d.cpp
+179
-0
mmdet3d/ops/iou3d/src/iou3d_kernel.cu
mmdet3d/ops/iou3d/src/iou3d_kernel.cu
+381
-0
mmdet3d/ops/norm.py
mmdet3d/ops/norm.py
+10
-0
mmdet3d/ops/spconv/__init__.py
mmdet3d/ops/spconv/__init__.py
+37
-0
mmdet3d/ops/spconv/conv.py
mmdet3d/ops/spconv/conv.py
+446
-0
mmdet3d/ops/spconv/functional.py
mmdet3d/ops/spconv/functional.py
+98
-0
mmdet3d/ops/spconv/include/paramsgrid.h
mmdet3d/ops/spconv/include/paramsgrid.h
+62
-0
mmdet3d/ops/spconv/include/prettyprint.h
mmdet3d/ops/spconv/include/prettyprint.h
+445
-0
mmdet3d/ops/spconv/include/pybind11_utils.h
mmdet3d/ops/spconv/include/pybind11_utils.h
+61
-0
mmdet3d/ops/spconv/include/spconv/box_iou.h
mmdet3d/ops/spconv/include/spconv/box_iou.h
+157
-0
mmdet3d/ops/spconv/include/spconv/fused_spconv_ops.h
mmdet3d/ops/spconv/include/spconv/fused_spconv_ops.h
+127
-0
mmdet3d/ops/spconv/include/spconv/geometry.h
mmdet3d/ops/spconv/include/spconv/geometry.h
+301
-0
mmdet3d/ops/spconv/include/spconv/indice.cu.h
mmdet3d/ops/spconv/include/spconv/indice.cu.h
+243
-0
mmdet3d/ops/spconv/include/spconv/indice.h
mmdet3d/ops/spconv/include/spconv/indice.h
+79
-0
mmdet3d/ops/spconv/include/spconv/maxpool.h
mmdet3d/ops/spconv/include/spconv/maxpool.h
+44
-0
mmdet3d/ops/spconv/include/spconv/mp_helper.h
mmdet3d/ops/spconv/include/spconv/mp_helper.h
+47
-0
mmdet3d/ops/spconv/include/spconv/nms.h
mmdet3d/ops/spconv/include/spconv/nms.h
+201
-0
mmdet3d/ops/spconv/include/spconv/nms_functor.h
mmdet3d/ops/spconv/include/spconv/nms_functor.h
+42
-0
mmdet3d/ops/spconv/include/spconv/nms_gpu.h
mmdet3d/ops/spconv/include/spconv/nms_gpu.h
+18
-0
mmdet3d/ops/spconv/include/spconv/nms_ops.h
mmdet3d/ops/spconv/include/spconv/nms_ops.h
+75
-0
No files found.
mmdet3d/ops/iou3d/src/iou3d.cpp
0 → 100644
View file @
d1aac35d
#include <torch/serialize/tensor.h>
#include <torch/extension.h>
#include <vector>
#include <cuda.h>
#include <cuda_runtime_api.h>
#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
#define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
#define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x)
#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
#define CHECK_ERROR(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline
void
gpuAssert
(
cudaError_t
code
,
const
char
*
file
,
int
line
,
bool
abort
=
true
)
{
if
(
code
!=
cudaSuccess
)
{
fprintf
(
stderr
,
"GPUassert: %s %s %d
\n
"
,
cudaGetErrorString
(
code
),
file
,
line
);
if
(
abort
)
exit
(
code
);
}
}
const
int
THREADS_PER_BLOCK_NMS
=
sizeof
(
unsigned
long
long
)
*
8
;
void
boxesoverlapLauncher
(
const
int
num_a
,
const
float
*
boxes_a
,
const
int
num_b
,
const
float
*
boxes_b
,
float
*
ans_overlap
);
void
boxesioubevLauncher
(
const
int
num_a
,
const
float
*
boxes_a
,
const
int
num_b
,
const
float
*
boxes_b
,
float
*
ans_iou
);
void
nmsLauncher
(
const
float
*
boxes
,
unsigned
long
long
*
mask
,
int
boxes_num
,
float
nms_overlap_thresh
);
void
nmsNormalLauncher
(
const
float
*
boxes
,
unsigned
long
long
*
mask
,
int
boxes_num
,
float
nms_overlap_thresh
);
int
boxes_overlap_bev_gpu
(
at
::
Tensor
boxes_a
,
at
::
Tensor
boxes_b
,
at
::
Tensor
ans_overlap
){
// params boxes_a: (N, 5) [x1, y1, x2, y2, ry]
// params boxes_b: (M, 5)
// params ans_overlap: (N, M)
CHECK_INPUT
(
boxes_a
);
CHECK_INPUT
(
boxes_b
);
CHECK_INPUT
(
ans_overlap
);
int
num_a
=
boxes_a
.
size
(
0
);
int
num_b
=
boxes_b
.
size
(
0
);
const
float
*
boxes_a_data
=
boxes_a
.
data
<
float
>
();
const
float
*
boxes_b_data
=
boxes_b
.
data
<
float
>
();
float
*
ans_overlap_data
=
ans_overlap
.
data
<
float
>
();
boxesoverlapLauncher
(
num_a
,
boxes_a_data
,
num_b
,
boxes_b_data
,
ans_overlap_data
);
return
1
;
}
int
boxes_iou_bev_gpu
(
at
::
Tensor
boxes_a
,
at
::
Tensor
boxes_b
,
at
::
Tensor
ans_iou
){
// params boxes_a: (N, 5) [x1, y1, x2, y2, ry]
// params boxes_b: (M, 5)
// params ans_overlap: (N, M)
CHECK_INPUT
(
boxes_a
);
CHECK_INPUT
(
boxes_b
);
CHECK_INPUT
(
ans_iou
);
int
num_a
=
boxes_a
.
size
(
0
);
int
num_b
=
boxes_b
.
size
(
0
);
const
float
*
boxes_a_data
=
boxes_a
.
data
<
float
>
();
const
float
*
boxes_b_data
=
boxes_b
.
data
<
float
>
();
float
*
ans_iou_data
=
ans_iou
.
data
<
float
>
();
boxesioubevLauncher
(
num_a
,
boxes_a_data
,
num_b
,
boxes_b_data
,
ans_iou_data
);
return
1
;
}
int
nms_gpu
(
at
::
Tensor
boxes
,
at
::
Tensor
keep
,
float
nms_overlap_thresh
){
// params boxes: (N, 5) [x1, y1, x2, y2, ry]
// params keep: (N)
CHECK_INPUT
(
boxes
);
CHECK_CONTIGUOUS
(
keep
);
int
boxes_num
=
boxes
.
size
(
0
);
const
float
*
boxes_data
=
boxes
.
data
<
float
>
();
long
*
keep_data
=
keep
.
data
<
long
>
();
const
int
col_blocks
=
DIVUP
(
boxes_num
,
THREADS_PER_BLOCK_NMS
);
unsigned
long
long
*
mask_data
=
NULL
;
CHECK_ERROR
(
cudaMalloc
((
void
**
)
&
mask_data
,
boxes_num
*
col_blocks
*
sizeof
(
unsigned
long
long
)));
nmsLauncher
(
boxes_data
,
mask_data
,
boxes_num
,
nms_overlap_thresh
);
// unsigned long long mask_cpu[boxes_num * col_blocks];
// unsigned long long *mask_cpu = new unsigned long long [boxes_num * col_blocks];
std
::
vector
<
unsigned
long
long
>
mask_cpu
(
boxes_num
*
col_blocks
);
// printf("boxes_num=%d, col_blocks=%d\n", boxes_num, col_blocks);
CHECK_ERROR
(
cudaMemcpy
(
&
mask_cpu
[
0
],
mask_data
,
boxes_num
*
col_blocks
*
sizeof
(
unsigned
long
long
),
cudaMemcpyDeviceToHost
));
cudaFree
(
mask_data
);
unsigned
long
long
remv_cpu
[
col_blocks
];
memset
(
remv_cpu
,
0
,
col_blocks
*
sizeof
(
unsigned
long
long
));
int
num_to_keep
=
0
;
for
(
int
i
=
0
;
i
<
boxes_num
;
i
++
){
int
nblock
=
i
/
THREADS_PER_BLOCK_NMS
;
int
inblock
=
i
%
THREADS_PER_BLOCK_NMS
;
if
(
!
(
remv_cpu
[
nblock
]
&
(
1ULL
<<
inblock
))){
keep_data
[
num_to_keep
++
]
=
i
;
unsigned
long
long
*
p
=
&
mask_cpu
[
0
]
+
i
*
col_blocks
;
for
(
int
j
=
nblock
;
j
<
col_blocks
;
j
++
){
remv_cpu
[
j
]
|=
p
[
j
];
}
}
}
if
(
cudaSuccess
!=
cudaGetLastError
()
)
printf
(
"Error!
\n
"
);
return
num_to_keep
;
}
int
nms_normal_gpu
(
at
::
Tensor
boxes
,
at
::
Tensor
keep
,
float
nms_overlap_thresh
){
// params boxes: (N, 5) [x1, y1, x2, y2, ry]
// params keep: (N)
CHECK_INPUT
(
boxes
);
CHECK_CONTIGUOUS
(
keep
);
int
boxes_num
=
boxes
.
size
(
0
);
const
float
*
boxes_data
=
boxes
.
data
<
float
>
();
long
*
keep_data
=
keep
.
data
<
long
>
();
const
int
col_blocks
=
DIVUP
(
boxes_num
,
THREADS_PER_BLOCK_NMS
);
unsigned
long
long
*
mask_data
=
NULL
;
CHECK_ERROR
(
cudaMalloc
((
void
**
)
&
mask_data
,
boxes_num
*
col_blocks
*
sizeof
(
unsigned
long
long
)));
nmsNormalLauncher
(
boxes_data
,
mask_data
,
boxes_num
,
nms_overlap_thresh
);
// unsigned long long mask_cpu[boxes_num * col_blocks];
// unsigned long long *mask_cpu = new unsigned long long [boxes_num * col_blocks];
std
::
vector
<
unsigned
long
long
>
mask_cpu
(
boxes_num
*
col_blocks
);
// printf("boxes_num=%d, col_blocks=%d\n", boxes_num, col_blocks);
CHECK_ERROR
(
cudaMemcpy
(
&
mask_cpu
[
0
],
mask_data
,
boxes_num
*
col_blocks
*
sizeof
(
unsigned
long
long
),
cudaMemcpyDeviceToHost
));
cudaFree
(
mask_data
);
unsigned
long
long
remv_cpu
[
col_blocks
];
memset
(
remv_cpu
,
0
,
col_blocks
*
sizeof
(
unsigned
long
long
));
int
num_to_keep
=
0
;
for
(
int
i
=
0
;
i
<
boxes_num
;
i
++
){
int
nblock
=
i
/
THREADS_PER_BLOCK_NMS
;
int
inblock
=
i
%
THREADS_PER_BLOCK_NMS
;
if
(
!
(
remv_cpu
[
nblock
]
&
(
1ULL
<<
inblock
))){
keep_data
[
num_to_keep
++
]
=
i
;
unsigned
long
long
*
p
=
&
mask_cpu
[
0
]
+
i
*
col_blocks
;
for
(
int
j
=
nblock
;
j
<
col_blocks
;
j
++
){
remv_cpu
[
j
]
|=
p
[
j
];
}
}
}
if
(
cudaSuccess
!=
cudaGetLastError
()
)
printf
(
"Error!
\n
"
);
return
num_to_keep
;
}
PYBIND11_MODULE
(
TORCH_EXTENSION_NAME
,
m
)
{
m
.
def
(
"boxes_overlap_bev_gpu"
,
&
boxes_overlap_bev_gpu
,
"oriented boxes overlap"
);
m
.
def
(
"boxes_iou_bev_gpu"
,
&
boxes_iou_bev_gpu
,
"oriented boxes iou"
);
m
.
def
(
"nms_gpu"
,
&
nms_gpu
,
"oriented nms gpu"
);
m
.
def
(
"nms_normal_gpu"
,
&
nms_normal_gpu
,
"nms gpu"
);
}
mmdet3d/ops/iou3d/src/iou3d_kernel.cu
0 → 100644
View file @
d1aac35d
#include <stdio.h>
#define THREADS_PER_BLOCK 16
#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
//#define DEBUG
const
int
THREADS_PER_BLOCK_NMS
=
sizeof
(
unsigned
long
long
)
*
8
;
const
float
EPS
=
1e-8
;
struct
Point
{
float
x
,
y
;
__device__
Point
()
{}
__device__
Point
(
double
_x
,
double
_y
){
x
=
_x
,
y
=
_y
;
}
__device__
void
set
(
float
_x
,
float
_y
){
x
=
_x
;
y
=
_y
;
}
__device__
Point
operator
+
(
const
Point
&
b
)
const
{
return
Point
(
x
+
b
.
x
,
y
+
b
.
y
);
}
__device__
Point
operator
-
(
const
Point
&
b
)
const
{
return
Point
(
x
-
b
.
x
,
y
-
b
.
y
);
}
};
__device__
inline
float
cross
(
const
Point
&
a
,
const
Point
&
b
){
return
a
.
x
*
b
.
y
-
a
.
y
*
b
.
x
;
}
__device__
inline
float
cross
(
const
Point
&
p1
,
const
Point
&
p2
,
const
Point
&
p0
){
return
(
p1
.
x
-
p0
.
x
)
*
(
p2
.
y
-
p0
.
y
)
-
(
p2
.
x
-
p0
.
x
)
*
(
p1
.
y
-
p0
.
y
);
}
__device__
int
check_rect_cross
(
const
Point
&
p1
,
const
Point
&
p2
,
const
Point
&
q1
,
const
Point
&
q2
){
int
ret
=
min
(
p1
.
x
,
p2
.
x
)
<=
max
(
q1
.
x
,
q2
.
x
)
&&
min
(
q1
.
x
,
q2
.
x
)
<=
max
(
p1
.
x
,
p2
.
x
)
&&
min
(
p1
.
y
,
p2
.
y
)
<=
max
(
q1
.
y
,
q2
.
y
)
&&
min
(
q1
.
y
,
q2
.
y
)
<=
max
(
p1
.
y
,
p2
.
y
);
return
ret
;
}
__device__
inline
int
check_in_box2d
(
const
float
*
box
,
const
Point
&
p
){
//params: box (5) [x1, y1, x2, y2, angle]
const
float
MARGIN
=
1e-5
;
float
center_x
=
(
box
[
0
]
+
box
[
2
])
/
2
;
float
center_y
=
(
box
[
1
]
+
box
[
3
])
/
2
;
float
angle_cos
=
cos
(
-
box
[
4
]),
angle_sin
=
sin
(
-
box
[
4
]);
// rotate the point in the opposite direction of box
float
rot_x
=
(
p
.
x
-
center_x
)
*
angle_cos
+
(
p
.
y
-
center_y
)
*
angle_sin
+
center_x
;
float
rot_y
=
-
(
p
.
x
-
center_x
)
*
angle_sin
+
(
p
.
y
-
center_y
)
*
angle_cos
+
center_y
;
#ifdef DEBUG
printf
(
"box: (%.3f, %.3f, %.3f, %.3f, %.3f)
\n
"
,
box
[
0
],
box
[
1
],
box
[
2
],
box
[
3
],
box
[
4
]);
printf
(
"center: (%.3f, %.3f), cossin(%.3f, %.3f), src(%.3f, %.3f), rot(%.3f, %.3f)
\n
"
,
center_x
,
center_y
,
angle_cos
,
angle_sin
,
p
.
x
,
p
.
y
,
rot_x
,
rot_y
);
#endif
return
(
rot_x
>
box
[
0
]
-
MARGIN
&&
rot_x
<
box
[
2
]
+
MARGIN
&&
rot_y
>
box
[
1
]
-
MARGIN
&&
rot_y
<
box
[
3
]
+
MARGIN
);
}
__device__
inline
int
intersection
(
const
Point
&
p1
,
const
Point
&
p0
,
const
Point
&
q1
,
const
Point
&
q0
,
Point
&
ans
){
// fast exclusion
if
(
check_rect_cross
(
p0
,
p1
,
q0
,
q1
)
==
0
)
return
0
;
// check cross standing
float
s1
=
cross
(
q0
,
p1
,
p0
);
float
s2
=
cross
(
p1
,
q1
,
p0
);
float
s3
=
cross
(
p0
,
q1
,
q0
);
float
s4
=
cross
(
q1
,
p1
,
q0
);
if
(
!
(
s1
*
s2
>
0
&&
s3
*
s4
>
0
))
return
0
;
// calculate intersection of two lines
float
s5
=
cross
(
q1
,
p1
,
p0
);
if
(
fabs
(
s5
-
s1
)
>
EPS
){
ans
.
x
=
(
s5
*
q0
.
x
-
s1
*
q1
.
x
)
/
(
s5
-
s1
);
ans
.
y
=
(
s5
*
q0
.
y
-
s1
*
q1
.
y
)
/
(
s5
-
s1
);
}
else
{
float
a0
=
p0
.
y
-
p1
.
y
,
b0
=
p1
.
x
-
p0
.
x
,
c0
=
p0
.
x
*
p1
.
y
-
p1
.
x
*
p0
.
y
;
float
a1
=
q0
.
y
-
q1
.
y
,
b1
=
q1
.
x
-
q0
.
x
,
c1
=
q0
.
x
*
q1
.
y
-
q1
.
x
*
q0
.
y
;
float
D
=
a0
*
b1
-
a1
*
b0
;
ans
.
x
=
(
b0
*
c1
-
b1
*
c0
)
/
D
;
ans
.
y
=
(
a1
*
c0
-
a0
*
c1
)
/
D
;
}
return
1
;
}
__device__
inline
void
rotate_around_center
(
const
Point
&
center
,
const
float
angle_cos
,
const
float
angle_sin
,
Point
&
p
){
float
new_x
=
(
p
.
x
-
center
.
x
)
*
angle_cos
+
(
p
.
y
-
center
.
y
)
*
angle_sin
+
center
.
x
;
float
new_y
=
-
(
p
.
x
-
center
.
x
)
*
angle_sin
+
(
p
.
y
-
center
.
y
)
*
angle_cos
+
center
.
y
;
p
.
set
(
new_x
,
new_y
);
}
__device__
inline
int
point_cmp
(
const
Point
&
a
,
const
Point
&
b
,
const
Point
&
center
){
return
atan2
(
a
.
y
-
center
.
y
,
a
.
x
-
center
.
x
)
>
atan2
(
b
.
y
-
center
.
y
,
b
.
x
-
center
.
x
);
}
__device__
inline
float
box_overlap
(
const
float
*
box_a
,
const
float
*
box_b
){
// params: box_a (5) [x1, y1, x2, y2, angle]
// params: box_b (5) [x1, y1, x2, y2, angle]
float
a_x1
=
box_a
[
0
],
a_y1
=
box_a
[
1
],
a_x2
=
box_a
[
2
],
a_y2
=
box_a
[
3
],
a_angle
=
box_a
[
4
];
float
b_x1
=
box_b
[
0
],
b_y1
=
box_b
[
1
],
b_x2
=
box_b
[
2
],
b_y2
=
box_b
[
3
],
b_angle
=
box_b
[
4
];
Point
center_a
((
a_x1
+
a_x2
)
/
2
,
(
a_y1
+
a_y2
)
/
2
);
Point
center_b
((
b_x1
+
b_x2
)
/
2
,
(
b_y1
+
b_y2
)
/
2
);
#ifdef DEBUG
printf
(
"a: (%.3f, %.3f, %.3f, %.3f, %.3f), b: (%.3f, %.3f, %.3f, %.3f, %.3f)
\n
"
,
a_x1
,
a_y1
,
a_x2
,
a_y2
,
a_angle
,
b_x1
,
b_y1
,
b_x2
,
b_y2
,
b_angle
);
printf
(
"center a: (%.3f, %.3f), b: (%.3f, %.3f)
\n
"
,
center_a
.
x
,
center_a
.
y
,
center_b
.
x
,
center_b
.
y
);
#endif
Point
box_a_corners
[
5
];
box_a_corners
[
0
].
set
(
a_x1
,
a_y1
);
box_a_corners
[
1
].
set
(
a_x2
,
a_y1
);
box_a_corners
[
2
].
set
(
a_x2
,
a_y2
);
box_a_corners
[
3
].
set
(
a_x1
,
a_y2
);
Point
box_b_corners
[
5
];
box_b_corners
[
0
].
set
(
b_x1
,
b_y1
);
box_b_corners
[
1
].
set
(
b_x2
,
b_y1
);
box_b_corners
[
2
].
set
(
b_x2
,
b_y2
);
box_b_corners
[
3
].
set
(
b_x1
,
b_y2
);
// get oriented corners
float
a_angle_cos
=
cos
(
a_angle
),
a_angle_sin
=
sin
(
a_angle
);
float
b_angle_cos
=
cos
(
b_angle
),
b_angle_sin
=
sin
(
b_angle
);
for
(
int
k
=
0
;
k
<
4
;
k
++
){
#ifdef DEBUG
printf
(
"before corner %d: a(%.3f, %.3f), b(%.3f, %.3f)
\n
"
,
k
,
box_a_corners
[
k
].
x
,
box_a_corners
[
k
].
y
,
box_b_corners
[
k
].
x
,
box_b_corners
[
k
].
y
);
#endif
rotate_around_center
(
center_a
,
a_angle_cos
,
a_angle_sin
,
box_a_corners
[
k
]);
rotate_around_center
(
center_b
,
b_angle_cos
,
b_angle_sin
,
box_b_corners
[
k
]);
#ifdef DEBUG
printf
(
"corner %d: a(%.3f, %.3f), b(%.3f, %.3f)
\n
"
,
k
,
box_a_corners
[
k
].
x
,
box_a_corners
[
k
].
y
,
box_b_corners
[
k
].
x
,
box_b_corners
[
k
].
y
);
#endif
}
box_a_corners
[
4
]
=
box_a_corners
[
0
];
box_b_corners
[
4
]
=
box_b_corners
[
0
];
// get intersection of lines
Point
cross_points
[
16
];
Point
poly_center
;
int
cnt
=
0
,
flag
=
0
;
poly_center
.
set
(
0
,
0
);
for
(
int
i
=
0
;
i
<
4
;
i
++
){
for
(
int
j
=
0
;
j
<
4
;
j
++
){
flag
=
intersection
(
box_a_corners
[
i
+
1
],
box_a_corners
[
i
],
box_b_corners
[
j
+
1
],
box_b_corners
[
j
],
cross_points
[
cnt
]);
if
(
flag
){
poly_center
=
poly_center
+
cross_points
[
cnt
];
cnt
++
;
}
}
}
// check corners
for
(
int
k
=
0
;
k
<
4
;
k
++
){
if
(
check_in_box2d
(
box_a
,
box_b_corners
[
k
])){
poly_center
=
poly_center
+
box_b_corners
[
k
];
cross_points
[
cnt
]
=
box_b_corners
[
k
];
cnt
++
;
}
if
(
check_in_box2d
(
box_b
,
box_a_corners
[
k
])){
poly_center
=
poly_center
+
box_a_corners
[
k
];
cross_points
[
cnt
]
=
box_a_corners
[
k
];
cnt
++
;
}
}
poly_center
.
x
/=
cnt
;
poly_center
.
y
/=
cnt
;
// sort the points of polygon
Point
temp
;
for
(
int
j
=
0
;
j
<
cnt
-
1
;
j
++
){
for
(
int
i
=
0
;
i
<
cnt
-
j
-
1
;
i
++
){
if
(
point_cmp
(
cross_points
[
i
],
cross_points
[
i
+
1
],
poly_center
)){
temp
=
cross_points
[
i
];
cross_points
[
i
]
=
cross_points
[
i
+
1
];
cross_points
[
i
+
1
]
=
temp
;
}
}
}
#ifdef DEBUG
printf
(
"cnt=%d
\n
"
,
cnt
);
for
(
int
i
=
0
;
i
<
cnt
;
i
++
){
printf
(
"All cross point %d: (%.3f, %.3f)
\n
"
,
i
,
cross_points
[
i
].
x
,
cross_points
[
i
].
y
);
}
#endif
// get the overlap areas
float
area
=
0
;
for
(
int
k
=
0
;
k
<
cnt
-
1
;
k
++
){
area
+=
cross
(
cross_points
[
k
]
-
cross_points
[
0
],
cross_points
[
k
+
1
]
-
cross_points
[
0
]);
}
return
fabs
(
area
)
/
2.0
;
}
__device__
inline
float
iou_bev
(
const
float
*
box_a
,
const
float
*
box_b
){
// params: box_a (5) [x1, y1, x2, y2, angle]
// params: box_b (5) [x1, y1, x2, y2, angle]
float
sa
=
(
box_a
[
2
]
-
box_a
[
0
])
*
(
box_a
[
3
]
-
box_a
[
1
]);
float
sb
=
(
box_b
[
2
]
-
box_b
[
0
])
*
(
box_b
[
3
]
-
box_b
[
1
]);
float
s_overlap
=
box_overlap
(
box_a
,
box_b
);
return
s_overlap
/
fmaxf
(
sa
+
sb
-
s_overlap
,
EPS
);
}
__global__
void
boxes_overlap_kernel
(
const
int
num_a
,
const
float
*
boxes_a
,
const
int
num_b
,
const
float
*
boxes_b
,
float
*
ans_overlap
){
const
int
a_idx
=
blockIdx
.
y
*
THREADS_PER_BLOCK
+
threadIdx
.
y
;
const
int
b_idx
=
blockIdx
.
x
*
THREADS_PER_BLOCK
+
threadIdx
.
x
;
if
(
a_idx
>=
num_a
||
b_idx
>=
num_b
){
return
;
}
const
float
*
cur_box_a
=
boxes_a
+
a_idx
*
5
;
const
float
*
cur_box_b
=
boxes_b
+
b_idx
*
5
;
float
s_overlap
=
box_overlap
(
cur_box_a
,
cur_box_b
);
ans_overlap
[
a_idx
*
num_b
+
b_idx
]
=
s_overlap
;
}
__global__
void
boxes_iou_bev_kernel
(
const
int
num_a
,
const
float
*
boxes_a
,
const
int
num_b
,
const
float
*
boxes_b
,
float
*
ans_iou
){
const
int
a_idx
=
blockIdx
.
y
*
THREADS_PER_BLOCK
+
threadIdx
.
y
;
const
int
b_idx
=
blockIdx
.
x
*
THREADS_PER_BLOCK
+
threadIdx
.
x
;
if
(
a_idx
>=
num_a
||
b_idx
>=
num_b
){
return
;
}
const
float
*
cur_box_a
=
boxes_a
+
a_idx
*
5
;
const
float
*
cur_box_b
=
boxes_b
+
b_idx
*
5
;
float
cur_iou_bev
=
iou_bev
(
cur_box_a
,
cur_box_b
);
ans_iou
[
a_idx
*
num_b
+
b_idx
]
=
cur_iou_bev
;
}
__global__
void
nms_kernel
(
const
int
boxes_num
,
const
float
nms_overlap_thresh
,
const
float
*
boxes
,
unsigned
long
long
*
mask
){
//params: boxes (N, 5) [x1, y1, x2, y2, ry]
//params: mask (N, N/THREADS_PER_BLOCK_NMS)
const
int
row_start
=
blockIdx
.
y
;
const
int
col_start
=
blockIdx
.
x
;
// if (row_start > col_start) return;
const
int
row_size
=
fminf
(
boxes_num
-
row_start
*
THREADS_PER_BLOCK_NMS
,
THREADS_PER_BLOCK_NMS
);
const
int
col_size
=
fminf
(
boxes_num
-
col_start
*
THREADS_PER_BLOCK_NMS
,
THREADS_PER_BLOCK_NMS
);
__shared__
float
block_boxes
[
THREADS_PER_BLOCK_NMS
*
5
];
if
(
threadIdx
.
x
<
col_size
)
{
block_boxes
[
threadIdx
.
x
*
5
+
0
]
=
boxes
[(
THREADS_PER_BLOCK_NMS
*
col_start
+
threadIdx
.
x
)
*
5
+
0
];
block_boxes
[
threadIdx
.
x
*
5
+
1
]
=
boxes
[(
THREADS_PER_BLOCK_NMS
*
col_start
+
threadIdx
.
x
)
*
5
+
1
];
block_boxes
[
threadIdx
.
x
*
5
+
2
]
=
boxes
[(
THREADS_PER_BLOCK_NMS
*
col_start
+
threadIdx
.
x
)
*
5
+
2
];
block_boxes
[
threadIdx
.
x
*
5
+
3
]
=
boxes
[(
THREADS_PER_BLOCK_NMS
*
col_start
+
threadIdx
.
x
)
*
5
+
3
];
block_boxes
[
threadIdx
.
x
*
5
+
4
]
=
boxes
[(
THREADS_PER_BLOCK_NMS
*
col_start
+
threadIdx
.
x
)
*
5
+
4
];
}
__syncthreads
();
if
(
threadIdx
.
x
<
row_size
)
{
const
int
cur_box_idx
=
THREADS_PER_BLOCK_NMS
*
row_start
+
threadIdx
.
x
;
const
float
*
cur_box
=
boxes
+
cur_box_idx
*
5
;
int
i
=
0
;
unsigned
long
long
t
=
0
;
int
start
=
0
;
if
(
row_start
==
col_start
)
{
start
=
threadIdx
.
x
+
1
;
}
for
(
i
=
start
;
i
<
col_size
;
i
++
)
{
if
(
iou_bev
(
cur_box
,
block_boxes
+
i
*
5
)
>
nms_overlap_thresh
){
t
|=
1ULL
<<
i
;
}
}
const
int
col_blocks
=
DIVUP
(
boxes_num
,
THREADS_PER_BLOCK_NMS
);
mask
[
cur_box_idx
*
col_blocks
+
col_start
]
=
t
;
}
}
__device__
inline
float
iou_normal
(
float
const
*
const
a
,
float
const
*
const
b
)
{
float
left
=
fmaxf
(
a
[
0
],
b
[
0
]),
right
=
fminf
(
a
[
2
],
b
[
2
]);
float
top
=
fmaxf
(
a
[
1
],
b
[
1
]),
bottom
=
fminf
(
a
[
3
],
b
[
3
]);
float
width
=
fmaxf
(
right
-
left
,
0.
f
),
height
=
fmaxf
(
bottom
-
top
,
0.
f
);
float
interS
=
width
*
height
;
float
Sa
=
(
a
[
2
]
-
a
[
0
])
*
(
a
[
3
]
-
a
[
1
]);
float
Sb
=
(
b
[
2
]
-
b
[
0
])
*
(
b
[
3
]
-
b
[
1
]);
return
interS
/
fmaxf
(
Sa
+
Sb
-
interS
,
EPS
);
}
__global__
void
nms_normal_kernel
(
const
int
boxes_num
,
const
float
nms_overlap_thresh
,
const
float
*
boxes
,
unsigned
long
long
*
mask
){
//params: boxes (N, 5) [x1, y1, x2, y2, ry]
//params: mask (N, N/THREADS_PER_BLOCK_NMS)
const
int
row_start
=
blockIdx
.
y
;
const
int
col_start
=
blockIdx
.
x
;
// if (row_start > col_start) return;
const
int
row_size
=
fminf
(
boxes_num
-
row_start
*
THREADS_PER_BLOCK_NMS
,
THREADS_PER_BLOCK_NMS
);
const
int
col_size
=
fminf
(
boxes_num
-
col_start
*
THREADS_PER_BLOCK_NMS
,
THREADS_PER_BLOCK_NMS
);
__shared__
float
block_boxes
[
THREADS_PER_BLOCK_NMS
*
5
];
if
(
threadIdx
.
x
<
col_size
)
{
block_boxes
[
threadIdx
.
x
*
5
+
0
]
=
boxes
[(
THREADS_PER_BLOCK_NMS
*
col_start
+
threadIdx
.
x
)
*
5
+
0
];
block_boxes
[
threadIdx
.
x
*
5
+
1
]
=
boxes
[(
THREADS_PER_BLOCK_NMS
*
col_start
+
threadIdx
.
x
)
*
5
+
1
];
block_boxes
[
threadIdx
.
x
*
5
+
2
]
=
boxes
[(
THREADS_PER_BLOCK_NMS
*
col_start
+
threadIdx
.
x
)
*
5
+
2
];
block_boxes
[
threadIdx
.
x
*
5
+
3
]
=
boxes
[(
THREADS_PER_BLOCK_NMS
*
col_start
+
threadIdx
.
x
)
*
5
+
3
];
block_boxes
[
threadIdx
.
x
*
5
+
4
]
=
boxes
[(
THREADS_PER_BLOCK_NMS
*
col_start
+
threadIdx
.
x
)
*
5
+
4
];
}
__syncthreads
();
if
(
threadIdx
.
x
<
row_size
)
{
const
int
cur_box_idx
=
THREADS_PER_BLOCK_NMS
*
row_start
+
threadIdx
.
x
;
const
float
*
cur_box
=
boxes
+
cur_box_idx
*
5
;
int
i
=
0
;
unsigned
long
long
t
=
0
;
int
start
=
0
;
if
(
row_start
==
col_start
)
{
start
=
threadIdx
.
x
+
1
;
}
for
(
i
=
start
;
i
<
col_size
;
i
++
)
{
if
(
iou_normal
(
cur_box
,
block_boxes
+
i
*
5
)
>
nms_overlap_thresh
){
t
|=
1ULL
<<
i
;
}
}
const
int
col_blocks
=
DIVUP
(
boxes_num
,
THREADS_PER_BLOCK_NMS
);
mask
[
cur_box_idx
*
col_blocks
+
col_start
]
=
t
;
}
}
void
boxesoverlapLauncher
(
const
int
num_a
,
const
float
*
boxes_a
,
const
int
num_b
,
const
float
*
boxes_b
,
float
*
ans_overlap
){
dim3
blocks
(
DIVUP
(
num_b
,
THREADS_PER_BLOCK
),
DIVUP
(
num_a
,
THREADS_PER_BLOCK
));
// blockIdx.x(col), blockIdx.y(row)
dim3
threads
(
THREADS_PER_BLOCK
,
THREADS_PER_BLOCK
);
boxes_overlap_kernel
<<<
blocks
,
threads
>>>
(
num_a
,
boxes_a
,
num_b
,
boxes_b
,
ans_overlap
);
#ifdef DEBUG
cudaDeviceSynchronize
();
// for using printf in kernel function
#endif
}
void
boxesioubevLauncher
(
const
int
num_a
,
const
float
*
boxes_a
,
const
int
num_b
,
const
float
*
boxes_b
,
float
*
ans_iou
){
dim3
blocks
(
DIVUP
(
num_b
,
THREADS_PER_BLOCK
),
DIVUP
(
num_a
,
THREADS_PER_BLOCK
));
// blockIdx.x(col), blockIdx.y(row)
dim3
threads
(
THREADS_PER_BLOCK
,
THREADS_PER_BLOCK
);
boxes_iou_bev_kernel
<<<
blocks
,
threads
>>>
(
num_a
,
boxes_a
,
num_b
,
boxes_b
,
ans_iou
);
}
void
nmsLauncher
(
const
float
*
boxes
,
unsigned
long
long
*
mask
,
int
boxes_num
,
float
nms_overlap_thresh
){
dim3
blocks
(
DIVUP
(
boxes_num
,
THREADS_PER_BLOCK_NMS
),
DIVUP
(
boxes_num
,
THREADS_PER_BLOCK_NMS
));
dim3
threads
(
THREADS_PER_BLOCK_NMS
);
nms_kernel
<<<
blocks
,
threads
>>>
(
boxes_num
,
nms_overlap_thresh
,
boxes
,
mask
);
}
void
nmsNormalLauncher
(
const
float
*
boxes
,
unsigned
long
long
*
mask
,
int
boxes_num
,
float
nms_overlap_thresh
){
dim3
blocks
(
DIVUP
(
boxes_num
,
THREADS_PER_BLOCK_NMS
),
DIVUP
(
boxes_num
,
THREADS_PER_BLOCK_NMS
));
dim3
threads
(
THREADS_PER_BLOCK_NMS
);
nms_normal_kernel
<<<
blocks
,
threads
>>>
(
boxes_num
,
nms_overlap_thresh
,
boxes
,
mask
);
}
mmdet3d/ops/norm.py
0 → 100644
View file @
d1aac35d
import
torch.nn
as
nn
from
mmdet.ops.norm
import
norm_cfg
from
.sync_bn
import
NaiveSyncBatchNorm1d
,
NaiveSyncBatchNorm2d
norm_cfg
.
update
({
'BN1d'
:
(
'bn'
,
nn
.
BatchNorm1d
),
'naiveSyncBN2d'
:
(
'bn'
,
NaiveSyncBatchNorm2d
),
'naiveSyncBN1d'
:
(
'bn'
,
NaiveSyncBatchNorm1d
),
})
mmdet3d/ops/spconv/__init__.py
0 → 100644
View file @
d1aac35d
# Copyright 2019 Yan Yan
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.conv
import
(
SparseConv2d
,
SparseConv3d
,
SparseConvTranspose2d
,
SparseConvTranspose3d
,
SparseInverseConv2d
,
SparseInverseConv3d
,
SubMConv2d
,
SubMConv3d
)
from
.modules
import
SparseModule
,
SparseSequential
from
.pool
import
SparseMaxPool2d
,
SparseMaxPool3d
from
.structure
import
SparseConvTensor
,
scatter_nd
__all__
=
[
'SparseConv2d'
,
'SparseConv3d'
,
'SubMConv2d'
,
'SubMConv3d'
,
'SparseConvTranspose2d'
,
'SparseConvTranspose3d'
,
'SparseInverseConv2d'
,
'SparseInverseConv3d'
,
'SparseModule'
,
'SparseSequential'
,
'SparseMaxPool2d'
,
'SparseMaxPool3d'
,
'SparseConvTensor'
,
'scatter_nd'
,
]
mmdet3d/ops/spconv/conv.py
0 → 100644
View file @
d1aac35d
# Copyright 2019 Yan Yan
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
math
import
numpy
as
np
import
torch
from
torch.nn
import
init
from
torch.nn.parameter
import
Parameter
from
.
import
functional
as
Fsp
from
.
import
ops
from
.modules
import
SparseModule
from
.structure
import
SparseConvTensor
def
_calculate_fan_in_and_fan_out_hwio
(
tensor
):
dimensions
=
tensor
.
ndimension
()
if
dimensions
<
2
:
raise
ValueError
(
'fan in and fan out can not be computed for tensor'
'with fewer than 2 dimensions'
)
if
dimensions
==
2
:
# Linear
fan_in
=
tensor
.
size
(
-
2
)
fan_out
=
tensor
.
size
(
-
1
)
else
:
num_input_fmaps
=
tensor
.
size
(
-
2
)
num_output_fmaps
=
tensor
.
size
(
-
1
)
receptive_field_size
=
1
if
tensor
.
dim
()
>
2
:
receptive_field_size
=
tensor
[...,
0
,
0
].
numel
()
fan_in
=
num_input_fmaps
*
receptive_field_size
fan_out
=
num_output_fmaps
*
receptive_field_size
return
fan_in
,
fan_out
class
SparseConvolution
(
SparseModule
):
def
__init__
(
self
,
ndim
,
in_channels
,
out_channels
,
kernel_size
=
3
,
stride
=
1
,
padding
=
0
,
dilation
=
1
,
groups
=
1
,
bias
=
True
,
subm
=
False
,
output_padding
=
0
,
transposed
=
False
,
inverse
=
False
,
indice_key
=
None
,
fused_bn
=
False
):
super
(
SparseConvolution
,
self
).
__init__
()
assert
groups
==
1
if
not
isinstance
(
kernel_size
,
(
list
,
tuple
)):
kernel_size
=
[
kernel_size
]
*
ndim
if
not
isinstance
(
stride
,
(
list
,
tuple
)):
stride
=
[
stride
]
*
ndim
if
not
isinstance
(
padding
,
(
list
,
tuple
)):
padding
=
[
padding
]
*
ndim
if
not
isinstance
(
dilation
,
(
list
,
tuple
)):
dilation
=
[
dilation
]
*
ndim
if
not
isinstance
(
output_padding
,
(
list
,
tuple
)):
output_padding
=
[
output_padding
]
*
ndim
for
d
,
s
in
zip
(
dilation
,
stride
):
assert
any
([
s
==
1
,
d
==
1
]),
"don't support this."
self
.
ndim
=
ndim
self
.
in_channels
=
in_channels
self
.
out_channels
=
out_channels
self
.
kernel_size
=
kernel_size
self
.
conv1x1
=
np
.
prod
(
kernel_size
)
==
1
self
.
stride
=
stride
self
.
padding
=
padding
self
.
dilation
=
dilation
self
.
transposed
=
transposed
self
.
inverse
=
inverse
self
.
output_padding
=
output_padding
self
.
groups
=
groups
self
.
subm
=
subm
self
.
indice_key
=
indice_key
self
.
fused_bn
=
fused_bn
self
.
weight
=
Parameter
(
torch
.
Tensor
(
*
kernel_size
,
in_channels
,
out_channels
))
if
bias
:
self
.
bias
=
Parameter
(
torch
.
Tensor
(
out_channels
))
else
:
self
.
register_parameter
(
'bias'
,
None
)
self
.
reset_parameters
()
def
reset_parameters
(
self
):
init
.
kaiming_uniform_
(
self
.
weight
,
a
=
math
.
sqrt
(
5
))
if
self
.
bias
is
not
None
:
fan_in
,
_
=
_calculate_fan_in_and_fan_out_hwio
(
self
.
weight
)
bound
=
1
/
math
.
sqrt
(
fan_in
)
init
.
uniform_
(
self
.
bias
,
-
bound
,
bound
)
def
forward
(
self
,
input
):
assert
isinstance
(
input
,
SparseConvTensor
)
features
=
input
.
features
device
=
features
.
device
indices
=
input
.
indices
spatial_shape
=
input
.
spatial_shape
batch_size
=
input
.
batch_size
if
not
self
.
subm
:
if
self
.
transposed
:
out_spatial_shape
=
ops
.
get_deconv_output_size
(
spatial_shape
,
self
.
kernel_size
,
self
.
stride
,
self
.
padding
,
self
.
dilation
,
self
.
output_padding
)
else
:
out_spatial_shape
=
ops
.
get_conv_output_size
(
spatial_shape
,
self
.
kernel_size
,
self
.
stride
,
self
.
padding
,
self
.
dilation
)
else
:
out_spatial_shape
=
spatial_shape
# input.update_grid(out_spatial_shape)
# t = time.time()
if
self
.
conv1x1
:
features
=
torch
.
mm
(
input
.
features
,
self
.
weight
.
view
(
self
.
in_channels
,
self
.
out_channels
))
if
self
.
bias
is
not
None
:
features
+=
self
.
bias
out_tensor
=
SparseConvTensor
(
features
,
input
.
indices
,
input
.
spatial_shape
,
input
.
batch_size
)
out_tensor
.
indice_dict
=
input
.
indice_dict
out_tensor
.
grid
=
input
.
grid
return
out_tensor
datas
=
input
.
find_indice_pair
(
self
.
indice_key
)
if
self
.
inverse
:
assert
datas
is
not
None
and
self
.
indice_key
is
not
None
_
,
outids
,
indice_pairs
,
indice_pair_num
,
out_spatial_shape
=
datas
assert
indice_pairs
.
shape
[
0
]
==
np
.
prod
(
self
.
kernel_size
),
'inverse conv must have same kernel size as its couple conv'
else
:
if
self
.
indice_key
is
not
None
and
datas
is
not
None
:
outids
,
_
,
indice_pairs
,
indice_pair_num
,
_
=
datas
else
:
outids
,
indice_pairs
,
indice_pair_num
=
ops
.
get_indice_pairs
(
indices
,
batch_size
,
spatial_shape
,
self
.
kernel_size
,
self
.
stride
,
self
.
padding
,
self
.
dilation
,
self
.
output_padding
,
self
.
subm
,
self
.
transposed
,
grid
=
input
.
grid
)
input
.
indice_dict
[
self
.
indice_key
]
=
(
outids
,
indices
,
indice_pairs
,
indice_pair_num
,
spatial_shape
)
if
self
.
fused_bn
:
assert
self
.
bias
is
not
None
out_features
=
ops
.
fused_indice_conv
(
features
,
self
.
weight
,
self
.
bias
,
indice_pairs
.
to
(
device
),
indice_pair_num
,
outids
.
shape
[
0
],
self
.
inverse
,
self
.
subm
)
else
:
if
self
.
subm
:
out_features
=
Fsp
.
indice_subm_conv
(
features
,
self
.
weight
,
indice_pairs
.
to
(
device
),
indice_pair_num
,
outids
.
shape
[
0
])
else
:
if
self
.
inverse
:
out_features
=
Fsp
.
indice_inverse_conv
(
features
,
self
.
weight
,
indice_pairs
.
to
(
device
),
indice_pair_num
,
outids
.
shape
[
0
])
else
:
out_features
=
Fsp
.
indice_conv
(
features
,
self
.
weight
,
indice_pairs
.
to
(
device
),
indice_pair_num
,
outids
.
shape
[
0
])
if
self
.
bias
is
not
None
:
out_features
+=
self
.
bias
out_tensor
=
SparseConvTensor
(
out_features
,
outids
,
out_spatial_shape
,
batch_size
)
out_tensor
.
indice_dict
=
input
.
indice_dict
out_tensor
.
grid
=
input
.
grid
return
out_tensor
class
SparseConv2d
(
SparseConvolution
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
,
padding
=
0
,
dilation
=
1
,
groups
=
1
,
bias
=
True
,
indice_key
=
None
):
super
(
SparseConv2d
,
self
).
__init__
(
2
,
in_channels
,
out_channels
,
kernel_size
,
stride
,
padding
,
dilation
,
groups
,
bias
,
indice_key
=
indice_key
)
class
SparseConv3d
(
SparseConvolution
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
,
padding
=
0
,
dilation
=
1
,
groups
=
1
,
bias
=
True
,
indice_key
=
None
):
super
(
SparseConv3d
,
self
).
__init__
(
3
,
in_channels
,
out_channels
,
kernel_size
,
stride
,
padding
,
dilation
,
groups
,
bias
,
indice_key
=
indice_key
)
class
SparseConv4d
(
SparseConvolution
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
,
padding
=
0
,
dilation
=
1
,
groups
=
1
,
bias
=
True
,
indice_key
=
None
):
super
(
SparseConv4d
,
self
).
__init__
(
4
,
in_channels
,
out_channels
,
kernel_size
,
stride
,
padding
,
dilation
,
groups
,
bias
,
indice_key
=
indice_key
)
class
SparseConvTranspose2d
(
SparseConvolution
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
,
padding
=
0
,
dilation
=
1
,
groups
=
1
,
bias
=
True
,
indice_key
=
None
):
super
(
SparseConvTranspose2d
,
self
).
__init__
(
2
,
in_channels
,
out_channels
,
kernel_size
,
stride
,
padding
,
dilation
,
groups
,
bias
,
transposed
=
True
,
indice_key
=
indice_key
)
class
SparseConvTranspose3d
(
SparseConvolution
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
,
padding
=
0
,
dilation
=
1
,
groups
=
1
,
bias
=
True
,
indice_key
=
None
):
super
(
SparseConvTranspose3d
,
self
).
__init__
(
3
,
in_channels
,
out_channels
,
kernel_size
,
stride
,
padding
,
dilation
,
groups
,
bias
,
transposed
=
True
,
indice_key
=
indice_key
)
class
SparseInverseConv2d
(
SparseConvolution
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
indice_key
,
bias
=
True
):
super
(
SparseInverseConv2d
,
self
).
__init__
(
2
,
in_channels
,
out_channels
,
kernel_size
,
bias
=
bias
,
inverse
=
True
,
indice_key
=
indice_key
)
class
SparseInverseConv3d
(
SparseConvolution
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
indice_key
,
bias
=
True
):
super
(
SparseInverseConv3d
,
self
).
__init__
(
3
,
in_channels
,
out_channels
,
kernel_size
,
bias
=
bias
,
inverse
=
True
,
indice_key
=
indice_key
)
class
SubMConv2d
(
SparseConvolution
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
,
padding
=
0
,
dilation
=
1
,
groups
=
1
,
bias
=
True
,
indice_key
=
None
):
super
(
SubMConv2d
,
self
).
__init__
(
2
,
in_channels
,
out_channels
,
kernel_size
,
stride
,
padding
,
dilation
,
groups
,
bias
,
True
,
indice_key
=
indice_key
)
class
SubMConv3d
(
SparseConvolution
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
,
padding
=
0
,
dilation
=
1
,
groups
=
1
,
bias
=
True
,
indice_key
=
None
):
super
(
SubMConv3d
,
self
).
__init__
(
3
,
in_channels
,
out_channels
,
kernel_size
,
stride
,
padding
,
dilation
,
groups
,
bias
,
True
,
indice_key
=
indice_key
)
class
SubMConv4d
(
SparseConvolution
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
,
padding
=
0
,
dilation
=
1
,
groups
=
1
,
bias
=
True
,
indice_key
=
None
):
super
(
SubMConv4d
,
self
).
__init__
(
4
,
in_channels
,
out_channels
,
kernel_size
,
stride
,
padding
,
dilation
,
groups
,
bias
,
True
,
indice_key
=
indice_key
)
mmdet3d/ops/spconv/functional.py
0 → 100644
View file @
d1aac35d
# Copyright 2019 Yan Yan
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
torch.autograd
import
Function
from
.
import
ops
as
ops
class
SparseConvFunction
(
Function
):
@
staticmethod
def
forward
(
ctx
,
features
,
filters
,
indice_pairs
,
indice_pair_num
,
num_activate_out
):
ctx
.
save_for_backward
(
indice_pairs
,
indice_pair_num
,
features
,
filters
)
return
ops
.
indice_conv
(
features
,
filters
,
indice_pairs
,
indice_pair_num
,
num_activate_out
,
False
)
@
staticmethod
def
backward
(
ctx
,
grad_output
):
indice_pairs
,
indice_pair_num
,
features
,
filters
=
ctx
.
saved_tensors
input_bp
,
filters_bp
=
ops
.
indice_conv_backward
(
features
,
filters
,
grad_output
,
indice_pairs
,
indice_pair_num
,
False
)
return
input_bp
,
filters_bp
,
None
,
None
,
None
class
SparseInverseConvFunction
(
Function
):
@
staticmethod
def
forward
(
ctx
,
features
,
filters
,
indice_pairs
,
indice_pair_num
,
num_activate_out
):
ctx
.
save_for_backward
(
indice_pairs
,
indice_pair_num
,
features
,
filters
)
return
ops
.
indice_conv
(
features
,
filters
,
indice_pairs
,
indice_pair_num
,
num_activate_out
,
True
,
False
)
@
staticmethod
def
backward
(
ctx
,
grad_output
):
indice_pairs
,
indice_pair_num
,
features
,
filters
=
ctx
.
saved_tensors
input_bp
,
filters_bp
=
ops
.
indice_conv_backward
(
features
,
filters
,
grad_output
,
indice_pairs
,
indice_pair_num
,
True
,
False
)
return
input_bp
,
filters_bp
,
None
,
None
,
None
class
SubMConvFunction
(
Function
):
@
staticmethod
def
forward
(
ctx
,
features
,
filters
,
indice_pairs
,
indice_pair_num
,
num_activate_out
):
ctx
.
save_for_backward
(
indice_pairs
,
indice_pair_num
,
features
,
filters
)
return
ops
.
indice_conv
(
features
,
filters
,
indice_pairs
,
indice_pair_num
,
num_activate_out
,
False
,
True
)
@
staticmethod
def
backward
(
ctx
,
grad_output
):
indice_pairs
,
indice_pair_num
,
features
,
filters
=
ctx
.
saved_tensors
input_bp
,
filters_bp
=
ops
.
indice_conv_backward
(
features
,
filters
,
grad_output
,
indice_pairs
,
indice_pair_num
,
False
,
True
)
return
input_bp
,
filters_bp
,
None
,
None
,
None
class
SparseMaxPoolFunction
(
Function
):
@
staticmethod
def
forward
(
ctx
,
features
,
indice_pairs
,
indice_pair_num
,
num_activate_out
):
out
=
ops
.
indice_maxpool
(
features
,
indice_pairs
,
indice_pair_num
,
num_activate_out
)
ctx
.
save_for_backward
(
indice_pairs
,
indice_pair_num
,
features
,
out
)
return
out
@
staticmethod
def
backward
(
ctx
,
grad_output
):
indice_pairs
,
indice_pair_num
,
features
,
out
=
ctx
.
saved_tensors
input_bp
=
ops
.
indice_maxpool_backward
(
features
,
out
,
grad_output
,
indice_pairs
,
indice_pair_num
)
return
input_bp
,
None
,
None
,
None
indice_conv
=
SparseConvFunction
.
apply
indice_inverse_conv
=
SparseInverseConvFunction
.
apply
indice_subm_conv
=
SubMConvFunction
.
apply
indice_maxpool
=
SparseMaxPoolFunction
.
apply
mmdet3d/ops/spconv/include/paramsgrid.h
0 → 100644
View file @
d1aac35d
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef PARAMS_GRID_H_
#define PARAMS_GRID_H_
#include <tuple>
#include <vector>
namespace
detail
{
template
<
class
T
>
int
getTotalSize
(
std
::
vector
<
T
>
arg
)
{
return
arg
.
size
();
}
template
<
class
T
,
class
...
TArgs
>
int
getTotalSize
(
std
::
vector
<
T
>
arg
,
std
::
vector
<
TArgs
>
...
args
)
{
return
arg
.
size
()
*
getTotalSize
(
args
...);
}
template
<
typename
T
>
int
getSize
(
std
::
vector
<
T
>
arg
)
{
return
arg
.
size
();
}
template
<
int
Idx
,
class
TT
,
class
T
>
void
assigner
(
TT
&
src
,
std
::
vector
<
int
>
counter
,
std
::
vector
<
T
>
&
arg
)
{
std
::
get
<
Idx
>
(
src
)
=
arg
[
counter
[
Idx
]];
}
template
<
int
Idx
,
class
TT
,
class
T
,
class
...
TArgs
>
void
assigner
(
TT
&
src
,
std
::
vector
<
int
>
counter
,
std
::
vector
<
T
>
&
arg
,
std
::
vector
<
TArgs
>
&
...
args
)
{
std
::
get
<
Idx
>
(
src
)
=
arg
[
counter
[
Idx
]];
assigner
<
Idx
+
1
>
(
src
,
counter
,
args
...);
}
}
// namespace detail
template
<
class
...
TArgs
>
std
::
vector
<
std
::
tuple
<
TArgs
...
>>
paramsGrid
(
std
::
vector
<
TArgs
>
...
args
)
{
int
length
=
detail
::
getTotalSize
(
args
...);
std
::
vector
<
int
>
sizes
=
{
detail
::
getSize
(
args
)...};
int
size
=
sizes
.
size
();
std
::
vector
<
std
::
tuple
<
TArgs
...
>>
params
(
length
);
std
::
vector
<
int
>
counter
(
size
);
for
(
int
i
=
0
;
i
<
length
;
++
i
)
{
detail
::
assigner
<
0
>
(
params
[
i
],
counter
,
args
...);
counter
[
size
-
1
]
+=
1
;
for
(
int
c
=
size
-
1
;
c
>=
0
;
--
c
)
{
if
(
counter
[
c
]
==
sizes
[
c
]
&&
c
>
0
)
{
counter
[
c
-
1
]
+=
1
;
counter
[
c
]
=
0
;
}
}
}
return
params
;
}
#endif
mmdet3d/ops/spconv/include/prettyprint.h
0 → 100644
View file @
d1aac35d
// Copyright Louis Delacroix 2010 - 2014.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
//
// A pretty printing library for C++
//
// Usage:
// Include this header, and operator<< will "just work".
#ifndef H_PRETTY_PRINT
#define H_PRETTY_PRINT
#include <cstddef>
#include <iterator>
#include <memory>
#include <ostream>
#include <set>
#include <tuple>
#include <type_traits>
#include <unordered_set>
#include <utility>
#include <valarray>
namespace
pretty_print
{
namespace
detail
{
// SFINAE type trait to detect whether T::const_iterator exists.
struct
sfinae_base
{
using
yes
=
char
;
using
no
=
yes
[
2
];
};
template
<
typename
T
>
struct
has_const_iterator
:
private
sfinae_base
{
private:
template
<
typename
C
>
static
yes
&
test
(
typename
C
::
const_iterator
*
);
template
<
typename
C
>
static
no
&
test
(...);
public:
static
const
bool
value
=
sizeof
(
test
<
T
>
(
nullptr
))
==
sizeof
(
yes
);
using
type
=
T
;
};
template
<
typename
T
>
struct
has_begin_end
:
private
sfinae_base
{
private:
template
<
typename
C
>
static
yes
&
f
(
typename
std
::
enable_if
<
std
::
is_same
<
decltype
(
static_cast
<
typename
C
::
const_iterator
(
C
::*
)()
const
>
(
&
C
::
begin
)),
typename
C
::
const_iterator
(
C
::*
)()
const
>::
value
>::
type
*
);
template
<
typename
C
>
static
no
&
f
(...);
template
<
typename
C
>
static
yes
&
g
(
typename
std
::
enable_if
<
std
::
is_same
<
decltype
(
static_cast
<
typename
C
::
const_iterator
(
C
::*
)()
const
>
(
&
C
::
end
)),
typename
C
::
const_iterator
(
C
::*
)()
const
>::
value
,
void
>::
type
*
);
template
<
typename
C
>
static
no
&
g
(...);
public:
static
bool
const
beg_value
=
sizeof
(
f
<
T
>
(
nullptr
))
==
sizeof
(
yes
);
static
bool
const
end_value
=
sizeof
(
g
<
T
>
(
nullptr
))
==
sizeof
(
yes
);
};
}
// namespace detail
// Holds the delimiter values for a specific character type
template
<
typename
TChar
>
struct
delimiters_values
{
using
char_type
=
TChar
;
const
char_type
*
prefix
;
const
char_type
*
delimiter
;
const
char_type
*
postfix
;
};
// Defines the delimiter values for a specific container and character type
template
<
typename
T
,
typename
TChar
>
struct
delimiters
{
using
type
=
delimiters_values
<
TChar
>
;
static
const
type
values
;
};
// Functor to print containers. You can use this directly if you want
// to specificy a non-default delimiters type. The printing logic can
// be customized by specializing the nested template.
template
<
typename
T
,
typename
TChar
=
char
,
typename
TCharTraits
=
::
std
::
char_traits
<
TChar
>,
typename
TDelimiters
=
delimiters
<
T
,
TChar
>>
struct
print_container_helper
{
using
delimiters_type
=
TDelimiters
;
using
ostream_type
=
std
::
basic_ostream
<
TChar
,
TCharTraits
>
;
template
<
typename
U
>
struct
printer
{
static
void
print_body
(
const
U
&
c
,
ostream_type
&
stream
)
{
using
std
::
begin
;
using
std
::
end
;
auto
it
=
begin
(
c
);
const
auto
the_end
=
end
(
c
);
if
(
it
!=
the_end
)
{
for
(
;
;
)
{
stream
<<
*
it
;
if
(
++
it
==
the_end
)
break
;
if
(
delimiters_type
::
values
.
delimiter
!=
NULL
)
stream
<<
delimiters_type
::
values
.
delimiter
;
}
}
}
};
print_container_helper
(
const
T
&
container
)
:
container_
(
container
)
{
}
inline
void
operator
()(
ostream_type
&
stream
)
const
{
if
(
delimiters_type
::
values
.
prefix
!=
NULL
)
stream
<<
delimiters_type
::
values
.
prefix
;
printer
<
T
>::
print_body
(
container_
,
stream
);
if
(
delimiters_type
::
values
.
postfix
!=
NULL
)
stream
<<
delimiters_type
::
values
.
postfix
;
}
private:
const
T
&
container_
;
};
// Specialization for pairs
template
<
typename
T
,
typename
TChar
,
typename
TCharTraits
,
typename
TDelimiters
>
template
<
typename
T1
,
typename
T2
>
struct
print_container_helper
<
T
,
TChar
,
TCharTraits
,
TDelimiters
>::
printer
<
std
::
pair
<
T1
,
T2
>>
{
using
ostream_type
=
typename
print_container_helper
<
T
,
TChar
,
TCharTraits
,
TDelimiters
>::
ostream_type
;
static
void
print_body
(
const
std
::
pair
<
T1
,
T2
>
&
c
,
ostream_type
&
stream
)
{
stream
<<
c
.
first
;
if
(
print_container_helper
<
T
,
TChar
,
TCharTraits
,
TDelimiters
>::
delimiters_type
::
values
.
delimiter
!=
NULL
)
stream
<<
print_container_helper
<
T
,
TChar
,
TCharTraits
,
TDelimiters
>::
delimiters_type
::
values
.
delimiter
;
stream
<<
c
.
second
;
}
};
// Specialization for tuples
template
<
typename
T
,
typename
TChar
,
typename
TCharTraits
,
typename
TDelimiters
>
template
<
typename
...
Args
>
struct
print_container_helper
<
T
,
TChar
,
TCharTraits
,
TDelimiters
>::
printer
<
std
::
tuple
<
Args
...
>>
{
using
ostream_type
=
typename
print_container_helper
<
T
,
TChar
,
TCharTraits
,
TDelimiters
>::
ostream_type
;
using
element_type
=
std
::
tuple
<
Args
...
>
;
template
<
std
::
size_t
I
>
struct
Int
{
};
static
void
print_body
(
const
element_type
&
c
,
ostream_type
&
stream
)
{
tuple_print
(
c
,
stream
,
Int
<
0
>
());
}
static
void
tuple_print
(
const
element_type
&
,
ostream_type
&
,
Int
<
sizeof
...(
Args
)
>
)
{
}
static
void
tuple_print
(
const
element_type
&
c
,
ostream_type
&
stream
,
typename
std
::
conditional
<
sizeof
...(
Args
)
!=
0
,
Int
<
0
>
,
std
::
nullptr_t
>::
type
)
{
stream
<<
std
::
get
<
0
>
(
c
);
tuple_print
(
c
,
stream
,
Int
<
1
>
());
}
template
<
std
::
size_t
N
>
static
void
tuple_print
(
const
element_type
&
c
,
ostream_type
&
stream
,
Int
<
N
>
)
{
if
(
print_container_helper
<
T
,
TChar
,
TCharTraits
,
TDelimiters
>::
delimiters_type
::
values
.
delimiter
!=
NULL
)
stream
<<
print_container_helper
<
T
,
TChar
,
TCharTraits
,
TDelimiters
>::
delimiters_type
::
values
.
delimiter
;
stream
<<
std
::
get
<
N
>
(
c
);
tuple_print
(
c
,
stream
,
Int
<
N
+
1
>
());
}
};
// Prints a print_container_helper to the specified stream.
template
<
typename
T
,
typename
TChar
,
typename
TCharTraits
,
typename
TDelimiters
>
inline
std
::
basic_ostream
<
TChar
,
TCharTraits
>
&
operator
<<
(
std
::
basic_ostream
<
TChar
,
TCharTraits
>
&
stream
,
const
print_container_helper
<
T
,
TChar
,
TCharTraits
,
TDelimiters
>
&
helper
)
{
helper
(
stream
);
return
stream
;
}
// Basic is_container template; specialize to derive from std::true_type for all desired container types
template
<
typename
T
>
struct
is_container
:
public
std
::
integral_constant
<
bool
,
detail
::
has_const_iterator
<
T
>::
value
&&
detail
::
has_begin_end
<
T
>::
beg_value
&&
detail
::
has_begin_end
<
T
>::
end_value
>
{
};
template
<
typename
T
,
std
::
size_t
N
>
struct
is_container
<
T
[
N
]
>
:
std
::
true_type
{
};
template
<
std
::
size_t
N
>
struct
is_container
<
char
[
N
]
>
:
std
::
false_type
{
};
template
<
typename
T
>
struct
is_container
<
std
::
valarray
<
T
>>
:
std
::
true_type
{
};
template
<
typename
T1
,
typename
T2
>
struct
is_container
<
std
::
pair
<
T1
,
T2
>>
:
std
::
true_type
{
};
template
<
typename
...
Args
>
struct
is_container
<
std
::
tuple
<
Args
...
>>
:
std
::
true_type
{
};
// Default delimiters
template
<
typename
T
>
struct
delimiters
<
T
,
char
>
{
static
const
delimiters_values
<
char
>
values
;
};
template
<
typename
T
>
const
delimiters_values
<
char
>
delimiters
<
T
,
char
>::
values
=
{
"["
,
", "
,
"]"
};
template
<
typename
T
>
struct
delimiters
<
T
,
wchar_t
>
{
static
const
delimiters_values
<
wchar_t
>
values
;
};
template
<
typename
T
>
const
delimiters_values
<
wchar_t
>
delimiters
<
T
,
wchar_t
>::
values
=
{
L"["
,
L", "
,
L"]"
};
// Delimiters for (multi)set and unordered_(multi)set
template
<
typename
T
,
typename
TComp
,
typename
TAllocator
>
struct
delimiters
<
::
std
::
set
<
T
,
TComp
,
TAllocator
>
,
char
>
{
static
const
delimiters_values
<
char
>
values
;
};
template
<
typename
T
,
typename
TComp
,
typename
TAllocator
>
const
delimiters_values
<
char
>
delimiters
<
::
std
::
set
<
T
,
TComp
,
TAllocator
>
,
char
>::
values
=
{
"{"
,
", "
,
"}"
};
template
<
typename
T
,
typename
TComp
,
typename
TAllocator
>
struct
delimiters
<
::
std
::
set
<
T
,
TComp
,
TAllocator
>
,
wchar_t
>
{
static
const
delimiters_values
<
wchar_t
>
values
;
};
template
<
typename
T
,
typename
TComp
,
typename
TAllocator
>
const
delimiters_values
<
wchar_t
>
delimiters
<
::
std
::
set
<
T
,
TComp
,
TAllocator
>
,
wchar_t
>::
values
=
{
L"{"
,
L", "
,
L"}"
};
template
<
typename
T
,
typename
TComp
,
typename
TAllocator
>
struct
delimiters
<
::
std
::
multiset
<
T
,
TComp
,
TAllocator
>
,
char
>
{
static
const
delimiters_values
<
char
>
values
;
};
template
<
typename
T
,
typename
TComp
,
typename
TAllocator
>
const
delimiters_values
<
char
>
delimiters
<
::
std
::
multiset
<
T
,
TComp
,
TAllocator
>
,
char
>::
values
=
{
"{"
,
", "
,
"}"
};
template
<
typename
T
,
typename
TComp
,
typename
TAllocator
>
struct
delimiters
<
::
std
::
multiset
<
T
,
TComp
,
TAllocator
>
,
wchar_t
>
{
static
const
delimiters_values
<
wchar_t
>
values
;
};
template
<
typename
T
,
typename
TComp
,
typename
TAllocator
>
const
delimiters_values
<
wchar_t
>
delimiters
<
::
std
::
multiset
<
T
,
TComp
,
TAllocator
>
,
wchar_t
>::
values
=
{
L"{"
,
L", "
,
L"}"
};
template
<
typename
T
,
typename
THash
,
typename
TEqual
,
typename
TAllocator
>
struct
delimiters
<
::
std
::
unordered_set
<
T
,
THash
,
TEqual
,
TAllocator
>
,
char
>
{
static
const
delimiters_values
<
char
>
values
;
};
template
<
typename
T
,
typename
THash
,
typename
TEqual
,
typename
TAllocator
>
const
delimiters_values
<
char
>
delimiters
<
::
std
::
unordered_set
<
T
,
THash
,
TEqual
,
TAllocator
>
,
char
>::
values
=
{
"{"
,
", "
,
"}"
};
template
<
typename
T
,
typename
THash
,
typename
TEqual
,
typename
TAllocator
>
struct
delimiters
<
::
std
::
unordered_set
<
T
,
THash
,
TEqual
,
TAllocator
>
,
wchar_t
>
{
static
const
delimiters_values
<
wchar_t
>
values
;
};
template
<
typename
T
,
typename
THash
,
typename
TEqual
,
typename
TAllocator
>
const
delimiters_values
<
wchar_t
>
delimiters
<
::
std
::
unordered_set
<
T
,
THash
,
TEqual
,
TAllocator
>
,
wchar_t
>::
values
=
{
L"{"
,
L", "
,
L"}"
};
template
<
typename
T
,
typename
THash
,
typename
TEqual
,
typename
TAllocator
>
struct
delimiters
<
::
std
::
unordered_multiset
<
T
,
THash
,
TEqual
,
TAllocator
>
,
char
>
{
static
const
delimiters_values
<
char
>
values
;
};
template
<
typename
T
,
typename
THash
,
typename
TEqual
,
typename
TAllocator
>
const
delimiters_values
<
char
>
delimiters
<
::
std
::
unordered_multiset
<
T
,
THash
,
TEqual
,
TAllocator
>
,
char
>::
values
=
{
"{"
,
", "
,
"}"
};
template
<
typename
T
,
typename
THash
,
typename
TEqual
,
typename
TAllocator
>
struct
delimiters
<
::
std
::
unordered_multiset
<
T
,
THash
,
TEqual
,
TAllocator
>
,
wchar_t
>
{
static
const
delimiters_values
<
wchar_t
>
values
;
};
template
<
typename
T
,
typename
THash
,
typename
TEqual
,
typename
TAllocator
>
const
delimiters_values
<
wchar_t
>
delimiters
<
::
std
::
unordered_multiset
<
T
,
THash
,
TEqual
,
TAllocator
>
,
wchar_t
>::
values
=
{
L"{"
,
L", "
,
L"}"
};
// Delimiters for pair and tuple
template
<
typename
T1
,
typename
T2
>
struct
delimiters
<
std
::
pair
<
T1
,
T2
>
,
char
>
{
static
const
delimiters_values
<
char
>
values
;
};
template
<
typename
T1
,
typename
T2
>
const
delimiters_values
<
char
>
delimiters
<
std
::
pair
<
T1
,
T2
>
,
char
>::
values
=
{
"("
,
", "
,
")"
};
template
<
typename
T1
,
typename
T2
>
struct
delimiters
<
::
std
::
pair
<
T1
,
T2
>
,
wchar_t
>
{
static
const
delimiters_values
<
wchar_t
>
values
;
};
template
<
typename
T1
,
typename
T2
>
const
delimiters_values
<
wchar_t
>
delimiters
<
::
std
::
pair
<
T1
,
T2
>
,
wchar_t
>::
values
=
{
L"("
,
L", "
,
L")"
};
template
<
typename
...
Args
>
struct
delimiters
<
std
::
tuple
<
Args
...
>
,
char
>
{
static
const
delimiters_values
<
char
>
values
;
};
template
<
typename
...
Args
>
const
delimiters_values
<
char
>
delimiters
<
std
::
tuple
<
Args
...
>
,
char
>::
values
=
{
"("
,
", "
,
")"
};
template
<
typename
...
Args
>
struct
delimiters
<
::
std
::
tuple
<
Args
...
>
,
wchar_t
>
{
static
const
delimiters_values
<
wchar_t
>
values
;
};
template
<
typename
...
Args
>
const
delimiters_values
<
wchar_t
>
delimiters
<
::
std
::
tuple
<
Args
...
>
,
wchar_t
>::
values
=
{
L"("
,
L", "
,
L")"
};
// Type-erasing helper class for easy use of custom delimiters.
// Requires TCharTraits = std::char_traits<TChar> and TChar = char or wchar_t, and MyDelims needs to be defined for TChar.
// Usage: "cout << pretty_print::custom_delims<MyDelims>(x)".
struct
custom_delims_base
{
virtual
~
custom_delims_base
()
{
}
virtual
std
::
ostream
&
stream
(
::
std
::
ostream
&
)
=
0
;
virtual
std
::
wostream
&
stream
(
::
std
::
wostream
&
)
=
0
;
};
template
<
typename
T
,
typename
Delims
>
struct
custom_delims_wrapper
:
custom_delims_base
{
custom_delims_wrapper
(
const
T
&
t_
)
:
t
(
t_
)
{
}
std
::
ostream
&
stream
(
std
::
ostream
&
s
)
{
return
s
<<
print_container_helper
<
T
,
char
,
std
::
char_traits
<
char
>
,
Delims
>
(
t
);
}
std
::
wostream
&
stream
(
std
::
wostream
&
s
)
{
return
s
<<
print_container_helper
<
T
,
wchar_t
,
std
::
char_traits
<
wchar_t
>
,
Delims
>
(
t
);
}
private:
const
T
&
t
;
};
template
<
typename
Delims
>
struct
custom_delims
{
template
<
typename
Container
>
custom_delims
(
const
Container
&
c
)
:
base
(
new
custom_delims_wrapper
<
Container
,
Delims
>
(
c
))
{
}
std
::
unique_ptr
<
custom_delims_base
>
base
;
};
template
<
typename
TChar
,
typename
TCharTraits
,
typename
Delims
>
inline
std
::
basic_ostream
<
TChar
,
TCharTraits
>
&
operator
<<
(
std
::
basic_ostream
<
TChar
,
TCharTraits
>
&
s
,
const
custom_delims
<
Delims
>
&
p
)
{
return
p
.
base
->
stream
(
s
);
}
// A wrapper for a C-style array given as pointer-plus-size.
// Usage: std::cout << pretty_print_array(arr, n) << std::endl;
template
<
typename
T
>
struct
array_wrapper_n
{
typedef
const
T
*
const_iterator
;
typedef
T
value_type
;
array_wrapper_n
(
const
T
*
const
a
,
size_t
n
)
:
_array
(
a
),
_n
(
n
)
{
}
inline
const_iterator
begin
()
const
{
return
_array
;
}
inline
const_iterator
end
()
const
{
return
_array
+
_n
;
}
private:
const
T
*
const
_array
;
size_t
_n
;
};
// A wrapper for hash-table based containers that offer local iterators to each bucket.
// Usage: std::cout << bucket_print(m, 4) << std::endl; (Prints bucket 5 of container m.)
template
<
typename
T
>
struct
bucket_print_wrapper
{
typedef
typename
T
::
const_local_iterator
const_iterator
;
typedef
typename
T
::
size_type
size_type
;
const_iterator
begin
()
const
{
return
m_map
.
cbegin
(
n
);
}
const_iterator
end
()
const
{
return
m_map
.
cend
(
n
);
}
bucket_print_wrapper
(
const
T
&
m
,
size_type
bucket
)
:
m_map
(
m
),
n
(
bucket
)
{
}
private:
const
T
&
m_map
;
const
size_type
n
;
};
}
// namespace pretty_print
// Global accessor functions for the convenience wrappers
template
<
typename
T
>
inline
pretty_print
::
array_wrapper_n
<
T
>
pretty_print_array
(
const
T
*
const
a
,
size_t
n
)
{
return
pretty_print
::
array_wrapper_n
<
T
>
(
a
,
n
);
}
template
<
typename
T
>
pretty_print
::
bucket_print_wrapper
<
T
>
bucket_print
(
const
T
&
m
,
typename
T
::
size_type
n
)
{
return
pretty_print
::
bucket_print_wrapper
<
T
>
(
m
,
n
);
}
// Main magic entry point: An overload snuck into namespace std.
// Can we do better?
namespace
std
{
// Prints a container to the stream using default delimiters
template
<
typename
T
,
typename
TChar
,
typename
TCharTraits
>
inline
typename
enable_if
<
::
pretty_print
::
is_container
<
T
>::
value
,
basic_ostream
<
TChar
,
TCharTraits
>
&>::
type
operator
<<
(
basic_ostream
<
TChar
,
TCharTraits
>
&
stream
,
const
T
&
container
)
{
return
stream
<<
::
pretty_print
::
print_container_helper
<
T
,
TChar
,
TCharTraits
>
(
container
);
}
}
#endif // H_PRETTY_PRINT
mmdet3d/ops/spconv/include/pybind11_utils.h
0 → 100644
View file @
d1aac35d
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <iostream>
#include <pybind11/embed.h> // everything needed for embedding
#include <pybind11/functional.h>
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <tensorview/tensorview.h>
namespace
py
=
pybind11
;
template
<
typename
T
,
typename
TPyObject
>
std
::
vector
<
T
>
array2Vector
(
TPyObject
arr
){
py
::
array
arr_np
=
arr
;
size_t
size
=
arr
.
attr
(
"size"
).
template
cast
<
size_t
>();
py
::
array_t
<
T
>
arr_cc
=
arr_np
;
std
::
vector
<
T
>
data
(
arr_cc
.
data
(),
arr_cc
.
data
()
+
size
);
return
data
;
}
template
<
typename
T
>
std
::
vector
<
T
>
arrayT2Vector
(
py
::
array_t
<
T
>
arr
)
{
std
::
vector
<
T
>
data
(
arr
.
data
(),
arr
.
data
()
+
arr
.
size
());
return
data
;
}
template
<
typename
T
,
typename
TPyObject
>
tv
::
TensorView
<
T
>
array2TensorView
(
TPyObject
arr
){
py
::
array
arr_np
=
arr
;
py
::
array_t
<
T
>
arr_cc
=
arr_np
;
tv
::
Shape
shape
;
for
(
int
i
=
0
;
i
<
arr_cc
.
ndim
();
++
i
){
shape
.
push_back
(
arr_cc
.
shape
(
i
));
}
return
tv
::
TensorView
<
T
>
(
arr_cc
.
mutable_data
(),
shape
);
}
template
<
typename
T
>
tv
::
TensorView
<
T
>
arrayT2TensorView
(
py
::
array_t
<
T
>
arr
){
tv
::
Shape
shape
;
for
(
int
i
=
0
;
i
<
arr
.
ndim
();
++
i
){
shape
.
push_back
(
arr
.
shape
(
i
));
}
return
tv
::
TensorView
<
T
>
(
arr
.
mutable_data
(),
shape
);
}
mmdet3d/ops/spconv/include/spconv/box_iou.h
0 → 100644
View file @
d1aac35d
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef BOX_IOU_H
#define BOX_IOU_H
#include <pybind11/pybind11.h>
// must include pybind11/eigen.h if using eigen matrix as arguments.
#include <algorithm>
#include <boost/geometry.hpp>
#include <pybind11/numpy.h>
namespace
spconv
{
// #include "voxelnet/core/cc/pybind11_helper.h"
namespace
py
=
pybind11
;
using
namespace
pybind11
::
literals
;
template
<
typename
DType
,
typename
ShapeContainer
>
inline
py
::
array_t
<
DType
>
constant
(
ShapeContainer
shape
,
DType
value
)
{
// create ROWMAJOR array.
py
::
array_t
<
DType
>
array
(
shape
);
std
::
fill
(
array
.
mutable_data
(),
array
.
mutable_data
()
+
array
.
size
(),
value
);
return
array
;
}
template
<
typename
DType
>
inline
py
::
array_t
<
DType
>
zeros
(
std
::
vector
<
long
int
>
shape
)
{
return
constant
<
DType
,
std
::
vector
<
long
int
>>
(
shape
,
0
);
}
template
<
typename
DType
>
py
::
array_t
<
DType
>
rbbox_iou
(
py
::
array_t
<
DType
>
box_corners
,
py
::
array_t
<
DType
>
qbox_corners
,
py
::
array_t
<
DType
>
standup_iou
,
DType
standup_thresh
)
{
namespace
bg
=
boost
::
geometry
;
typedef
bg
::
model
::
point
<
DType
,
2
,
bg
::
cs
::
cartesian
>
point_t
;
typedef
bg
::
model
::
polygon
<
point_t
>
polygon_t
;
polygon_t
poly
,
qpoly
;
std
::
vector
<
polygon_t
>
poly_inter
,
poly_union
;
DType
inter_area
,
union_area
;
auto
box_corners_r
=
box_corners
.
template
unchecked
<
3
>();
auto
qbox_corners_r
=
qbox_corners
.
template
unchecked
<
3
>();
auto
standup_iou_r
=
standup_iou
.
template
unchecked
<
2
>();
auto
N
=
box_corners_r
.
shape
(
0
);
auto
K
=
qbox_corners_r
.
shape
(
0
);
py
::
array_t
<
DType
>
overlaps
=
zeros
<
DType
>
({
int
(
N
),
int
(
K
)});
auto
overlaps_rw
=
overlaps
.
template
mutable_unchecked
<
2
>();
if
(
N
==
0
||
K
==
0
)
{
return
overlaps
;
}
for
(
int
k
=
0
;
k
<
K
;
++
k
)
{
for
(
int
n
=
0
;
n
<
N
;
++
n
)
{
if
(
standup_iou_r
(
n
,
k
)
<=
standup_thresh
)
continue
;
bg
::
append
(
poly
,
point_t
(
box_corners_r
(
n
,
0
,
0
),
box_corners_r
(
n
,
0
,
1
)));
bg
::
append
(
poly
,
point_t
(
box_corners_r
(
n
,
1
,
0
),
box_corners_r
(
n
,
1
,
1
)));
bg
::
append
(
poly
,
point_t
(
box_corners_r
(
n
,
2
,
0
),
box_corners_r
(
n
,
2
,
1
)));
bg
::
append
(
poly
,
point_t
(
box_corners_r
(
n
,
3
,
0
),
box_corners_r
(
n
,
3
,
1
)));
bg
::
append
(
poly
,
point_t
(
box_corners_r
(
n
,
0
,
0
),
box_corners_r
(
n
,
0
,
1
)));
bg
::
append
(
qpoly
,
point_t
(
qbox_corners_r
(
k
,
0
,
0
),
qbox_corners_r
(
k
,
0
,
1
)));
bg
::
append
(
qpoly
,
point_t
(
qbox_corners_r
(
k
,
1
,
0
),
qbox_corners_r
(
k
,
1
,
1
)));
bg
::
append
(
qpoly
,
point_t
(
qbox_corners_r
(
k
,
2
,
0
),
qbox_corners_r
(
k
,
2
,
1
)));
bg
::
append
(
qpoly
,
point_t
(
qbox_corners_r
(
k
,
3
,
0
),
qbox_corners_r
(
k
,
3
,
1
)));
bg
::
append
(
qpoly
,
point_t
(
qbox_corners_r
(
k
,
0
,
0
),
qbox_corners_r
(
k
,
0
,
1
)));
bg
::
intersection
(
poly
,
qpoly
,
poly_inter
);
if
(
!
poly_inter
.
empty
())
{
inter_area
=
bg
::
area
(
poly_inter
.
front
());
bg
::
union_
(
poly
,
qpoly
,
poly_union
);
if
(
!
poly_union
.
empty
())
{
union_area
=
bg
::
area
(
poly_union
.
front
());
overlaps_rw
(
n
,
k
)
=
inter_area
/
union_area
;
}
poly_union
.
clear
();
}
poly
.
clear
();
qpoly
.
clear
();
poly_inter
.
clear
();
}
}
return
overlaps
;
}
template
<
typename
DType
>
py
::
array_t
<
DType
>
rbbox_intersection
(
py
::
array_t
<
DType
>
box_corners
,
py
::
array_t
<
DType
>
qbox_corners
,
py
::
array_t
<
DType
>
standup_iou
,
DType
standup_thresh
)
{
namespace
bg
=
boost
::
geometry
;
typedef
bg
::
model
::
point
<
DType
,
2
,
bg
::
cs
::
cartesian
>
point_t
;
typedef
bg
::
model
::
polygon
<
point_t
>
polygon_t
;
polygon_t
poly
,
qpoly
;
std
::
vector
<
polygon_t
>
poly_inter
,
poly_union
;
DType
inter_area
,
union_area
;
auto
box_corners_r
=
box_corners
.
template
unchecked
<
3
>();
auto
qbox_corners_r
=
qbox_corners
.
template
unchecked
<
3
>();
auto
standup_iou_r
=
standup_iou
.
template
unchecked
<
2
>();
auto
N
=
box_corners_r
.
shape
(
0
);
auto
K
=
qbox_corners_r
.
shape
(
0
);
py
::
array_t
<
DType
>
overlaps
=
zeros
<
DType
>
({
int
(
N
),
int
(
K
)});
auto
overlaps_rw
=
overlaps
.
template
mutable_unchecked
<
2
>();
if
(
N
==
0
||
K
==
0
)
{
return
overlaps
;
}
for
(
int
k
=
0
;
k
<
K
;
++
k
)
{
for
(
int
n
=
0
;
n
<
N
;
++
n
)
{
if
(
standup_iou_r
(
n
,
k
)
<=
standup_thresh
)
continue
;
bg
::
append
(
poly
,
point_t
(
box_corners_r
(
n
,
0
,
0
),
box_corners_r
(
n
,
0
,
1
)));
bg
::
append
(
poly
,
point_t
(
box_corners_r
(
n
,
1
,
0
),
box_corners_r
(
n
,
1
,
1
)));
bg
::
append
(
poly
,
point_t
(
box_corners_r
(
n
,
2
,
0
),
box_corners_r
(
n
,
2
,
1
)));
bg
::
append
(
poly
,
point_t
(
box_corners_r
(
n
,
3
,
0
),
box_corners_r
(
n
,
3
,
1
)));
bg
::
append
(
poly
,
point_t
(
box_corners_r
(
n
,
0
,
0
),
box_corners_r
(
n
,
0
,
1
)));
bg
::
append
(
qpoly
,
point_t
(
qbox_corners_r
(
k
,
0
,
0
),
qbox_corners_r
(
k
,
0
,
1
)));
bg
::
append
(
qpoly
,
point_t
(
qbox_corners_r
(
k
,
1
,
0
),
qbox_corners_r
(
k
,
1
,
1
)));
bg
::
append
(
qpoly
,
point_t
(
qbox_corners_r
(
k
,
2
,
0
),
qbox_corners_r
(
k
,
2
,
1
)));
bg
::
append
(
qpoly
,
point_t
(
qbox_corners_r
(
k
,
3
,
0
),
qbox_corners_r
(
k
,
3
,
1
)));
bg
::
append
(
qpoly
,
point_t
(
qbox_corners_r
(
k
,
0
,
0
),
qbox_corners_r
(
k
,
0
,
1
)));
bg
::
intersection
(
poly
,
qpoly
,
poly_inter
);
if
(
!
poly_inter
.
empty
())
{
inter_area
=
bg
::
area
(
poly_inter
.
front
());
overlaps_rw
(
n
,
k
)
=
inter_area
;
}
poly
.
clear
();
qpoly
.
clear
();
poly_inter
.
clear
();
}
}
return
overlaps
;
}
}
// namespace spconv
#endif
mmdet3d/ops/spconv/include/spconv/fused_spconv_ops.h
0 → 100644
View file @
d1aac35d
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef FUSED_SPARSE_CONV_OP_H_
#define FUSED_SPARSE_CONV_OP_H_
#include <cuda_runtime_api.h>
#include <spconv/indice.h>
#include <spconv/reordering.h>
#include <torch/script.h>
#include <torch_utils.h>
#include <utility/timer.h>
namespace
spconv
{
// torch.jit's doc says only support int64, so we need to convert to int32.
template
<
typename
T
>
torch
::
Tensor
fusedIndiceConvBatchNorm
(
torch
::
Tensor
features
,
torch
::
Tensor
filters
,
torch
::
Tensor
bias
,
torch
::
Tensor
indicePairs
,
torch
::
Tensor
indiceNum
,
int64_t
numActOut
,
int64_t
_inverse
,
int64_t
_subM
)
{
bool
subM
=
_subM
!=
0
;
bool
inverse
=
_inverse
!=
0
;
auto
device
=
features
.
device
().
type
();
auto
ndim
=
filters
.
dim
()
-
2
;
auto
kernelVolume
=
indicePairs
.
size
(
0
);
auto
numInPlanes
=
features
.
size
(
1
);
auto
numOutPlanes
=
filters
.
size
(
ndim
+
1
);
auto
indicePairNumCpu
=
indiceNum
.
to
({
torch
::
kCPU
});
auto
indicePairMaxSizeIter
=
std
::
max_element
(
indicePairNumCpu
.
data
<
int
>
(),
indicePairNumCpu
.
data
<
int
>
()
+
kernelVolume
);
int
indicePairMaxOffset
=
indicePairMaxSizeIter
-
indicePairNumCpu
.
data
<
int
>
();
int
indicePairMaxSize
=
*
indicePairMaxSizeIter
;
/*if (_subM){
std::vector<int> indicePairNumVec(indicePairNumCpu.data<int>(), indicePairNumCpu.data<int>() + kernelVolume);
indicePairNumVec.erase(indicePairNumVec.begin() + indicePairMaxOffset);
auto indicePairVecMaxSizeIter = std::max_element(
indicePairNumVec.begin(), indicePairNumVec.end());
indicePairMaxSize = *indicePairVecMaxSizeIter;
}*/
auto
options
=
torch
::
TensorOptions
().
dtype
(
features
.
dtype
()).
device
(
features
.
device
());
// auto indicePairOptions =
// torch::TensorOptions().dtype(torch::kInt64).device(indicePairs.device());
torch
::
Tensor
output
=
torch
::
zeros
({
numActOut
,
numOutPlanes
},
options
).
copy_
(
bias
);
torch
::
Tensor
inputBuffer
=
torch
::
zeros
({
indicePairMaxSize
,
numInPlanes
},
options
);
torch
::
Tensor
outputBuffer
=
torch
::
zeros
({
indicePairMaxSize
,
numOutPlanes
},
options
);
filters
=
filters
.
view
({
-
1
,
numInPlanes
,
numOutPlanes
});
if
(
subM
)
{
// the center index of subm conv don't need gather and scatter
// add.
torch
::
mm_out
(
output
,
features
,
filters
[
indicePairMaxOffset
]);
}
double
totalGatherTime
=
0
;
double
totalGEMMTime
=
0
;
double
totalSAddTime
=
0
;
for
(
int
i
=
0
;
i
<
kernelVolume
;
++
i
)
{
auto
nHot
=
indicePairNumCpu
.
data
<
int
>
()[
i
];
if
(
nHot
<=
0
||
(
subM
&&
i
==
indicePairMaxOffset
))
{
continue
;
}
// auto timer = spconv::CudaContextTimer<>();
auto
outputBufferBlob
=
torch
::
from_blob
(
outputBuffer
.
data
<
T
>
(),
{
nHot
,
numOutPlanes
},
options
);
auto
inputBufferBlob
=
torch
::
from_blob
(
inputBuffer
.
data
<
T
>
(),
{
nHot
,
numInPlanes
},
options
);
if
(
device
==
torch
::
kCPU
)
{
functor
::
SparseGatherFunctor
<
tv
::
CPU
,
T
,
int
>
gatherFtor
;
gatherFtor
(
tv
::
CPU
(),
tv
::
torch2tv
<
T
>
(
inputBuffer
),
tv
::
torch2tv
<
const
T
>
(
features
),
tv
::
torch2tv
<
const
int
>
(
indicePairs
).
subview
(
i
,
inverse
),
nHot
);
}
else
{
functor
::
SparseGatherFunctor
<
tv
::
GPU
,
T
,
int
>
gatherFtor
;
gatherFtor
(
tv
::
TorchGPU
(),
tv
::
torch2tv
<
T
>
(
inputBuffer
),
tv
::
torch2tv
<
const
T
>
(
features
),
tv
::
torch2tv
<
const
int
>
(
indicePairs
).
subview
(
i
,
inverse
),
nHot
);
TV_CHECK_CUDA_ERR
();
/* slower than SparseGatherFunctor, may due to int->long conversion
auto indicePairLong = indicePairs[i][inverse].to(torch::kInt64);
auto indicePairBlob = torch::from_blob(indicePairLong.data<long>(), {nHot},
indicePairOptions);
torch::index_select_out(inputBufferBlob, features, 0,
indicePairBlob);*/
}
// totalGatherTime += timer.report() / 1000.0;
torch
::
mm_out
(
outputBufferBlob
,
inputBufferBlob
,
filters
[
i
]);
// totalGEMMTime += timer.report() / 1000.0;
if
(
device
==
torch
::
kCPU
)
{
functor
::
SparseScatterAddFunctor
<
tv
::
CPU
,
T
,
int
>
scatterFtor
;
scatterFtor
(
tv
::
CPU
(),
tv
::
torch2tv
<
T
>
(
output
),
tv
::
torch2tv
<
const
T
>
(
outputBuffer
),
tv
::
torch2tv
<
const
int
>
(
indicePairs
).
subview
(
i
,
!
inverse
),
nHot
,
true
);
}
else
{
functor
::
SparseScatterAddFunctor
<
tv
::
GPU
,
T
,
int
>
scatterFtor
;
scatterFtor
(
tv
::
TorchGPU
(),
tv
::
torch2tv
<
T
>
(
output
),
tv
::
torch2tv
<
const
T
>
(
outputBuffer
),
tv
::
torch2tv
<
const
int
>
(
indicePairs
).
subview
(
i
,
!
inverse
),
nHot
,
true
);
TV_CHECK_CUDA_ERR
();
}
// totalSAddTime += timer.report() / 1000.0;
}
// std::cout << "gather time " << totalGatherTime << std::endl;
// std::cout << "gemm time " << totalGEMMTime << std::endl;
// std::cout << "scatteradd time " << totalSAddTime << std::endl;
return
output
;
}
}
// namespace spconv
#endif
mmdet3d/ops/spconv/include/spconv/geometry.h
0 → 100644
View file @
d1aac35d
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef SPCONV_GEOMETRY_H_
#define SPCONV_GEOMETRY_H_
#include <iostream>
#include <limits>
#include <tensorview/tensorview.h>
namespace
spconv
{
template
<
typename
Index
,
unsigned
NDim
>
TV_HOST_DEVICE
Index
getValidOutPos
(
const
Index
*
input_pos
,
const
Index
*
kernelSize
,
const
Index
*
stride
,
const
Index
*
padding
,
const
Index
*
dilation
,
const
Index
*
outSpatialShape
,
Index
*
out
)
{
Index
lowers
[
NDim
];
Index
uppers
[
NDim
];
Index
counter
[
NDim
];
Index
counterSize
[
NDim
];
Index
pointCounter
=
0
;
Index
val
;
Index
numPoints
=
1
;
Index
m
,
offset
;
bool
valid
=
false
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
lowers
[
i
]
=
(
input_pos
[
i
]
-
(
kernelSize
[
i
]
-
1
)
*
dilation
[
i
]
-
1
+
stride
[
i
]
+
padding
[
i
])
/
stride
[
i
];
uppers
[
i
]
=
(
input_pos
[
i
]
+
padding
[
i
])
/
stride
[
i
];
}
#pragma unroll
for
(
unsigned
i
=
0
;
i
<
NDim
;
++
i
)
{
counterSize
[
i
]
=
((
uppers
[
i
]
-
lowers
[
i
])
/
dilation
[
i
]
+
1
);
numPoints
*=
counterSize
[
i
];
}
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
counter
[
i
]
=
0
;
}
for
(
int
i
=
0
;
i
<
numPoints
;
++
i
)
{
valid
=
true
;
m
=
1
;
offset
=
0
;
#pragma unroll
for
(
int
j
=
NDim
-
1
;
j
>=
0
;
--
j
)
{
val
=
uppers
[
j
]
-
counter
[
j
]
*
dilation
[
j
];
out
[
pointCounter
*
(
NDim
+
1
)
+
j
]
=
val
;
if
(
val
<
0
||
(
val
>
outSpatialShape
[
j
]
-
1
))
{
valid
=
false
;
// break;
}
offset
+=
m
*
(
input_pos
[
j
]
-
val
*
stride
[
j
]
+
padding
[
j
])
/
dilation
[
j
];
m
*=
kernelSize
[
j
];
}
out
[
pointCounter
*
(
NDim
+
1
)
+
NDim
]
=
offset
;
if
(
valid
)
++
pointCounter
;
counter
[
NDim
-
1
]
+=
1
;
#pragma unroll
for
(
int
c
=
NDim
-
1
;
c
>=
0
;
--
c
)
{
if
(
counter
[
c
]
==
counterSize
[
c
]
&&
c
>
0
)
{
counter
[
c
-
1
]
+=
1
;
counter
[
c
]
=
0
;
}
}
}
return
pointCounter
;
}
template
<
typename
Index
,
unsigned
NDim
>
TV_HOST_DEVICE
Index
getValidOutPosTranspose
(
const
Index
*
input_pos
,
const
Index
*
kernelSize
,
const
Index
*
stride
,
const
Index
*
padding
,
const
Index
*
dilation
,
const
Index
*
outSpatialShape
,
Index
*
out
)
{
Index
lowers
[
NDim
];
Index
uppers
[
NDim
];
Index
counter
[
NDim
];
Index
counterSize
[
NDim
];
Index
pointCounter
=
0
;
Index
val
;
Index
numPoints
=
1
;
Index
m
,
offset
;
bool
valid
=
false
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
lowers
[
i
]
=
input_pos
[
i
]
*
stride
[
i
]
-
padding
[
i
];
uppers
[
i
]
=
lowers
[
i
]
+
(
kernelSize
[
i
]
-
1
)
*
dilation
[
i
];
}
#pragma unroll
for
(
unsigned
i
=
0
;
i
<
NDim
;
++
i
)
{
counterSize
[
i
]
=
((
uppers
[
i
]
-
lowers
[
i
])
/
dilation
[
i
]
+
1
);
numPoints
*=
counterSize
[
i
];
}
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
counter
[
i
]
=
0
;
}
for
(
int
i
=
0
;
i
<
numPoints
;
++
i
)
{
valid
=
true
;
m
=
1
;
offset
=
0
;
#pragma unroll
for
(
int
j
=
NDim
-
1
;
j
>=
0
;
--
j
)
{
val
=
uppers
[
j
]
-
counter
[
j
]
*
dilation
[
j
];
out
[
pointCounter
*
(
NDim
+
1
)
+
j
]
=
val
;
if
(
val
<
0
||
(
val
>
outSpatialShape
[
j
]
-
1
))
{
valid
=
false
;
// break;
}
offset
+=
m
*
(
val
-
lowers
[
j
])
/
dilation
[
j
];
m
*=
kernelSize
[
j
];
}
out
[
pointCounter
*
(
NDim
+
1
)
+
NDim
]
=
offset
;
if
(
valid
)
++
pointCounter
;
counter
[
NDim
-
1
]
+=
1
;
#pragma unroll
for
(
int
c
=
NDim
-
1
;
c
>=
0
;
--
c
)
{
if
(
counter
[
c
]
==
counterSize
[
c
]
&&
c
>
0
)
{
counter
[
c
-
1
]
+=
1
;
counter
[
c
]
=
0
;
}
}
}
return
pointCounter
;
}
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
Index
getIndicePairsConv
(
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
Index
>
indicesOut
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
const
Index
*
kernelSize
,
const
Index
*
stride
,
const
Index
*
padding
,
const
Index
*
dilation
,
const
Index
*
outSpatialShape
)
{
// indicesOut: num_active * kernelVolume * (NDim + 1)
Index
numAct
=
0
;
auto
numActIn
=
indicesIn
.
dim
(
0
);
Index
batchIdx
=
0
;
Index
spatialVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
spatialVolume
*=
outSpatialShape
[
i
];
}
Index
kernelVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
kernelVolume
*=
kernelSize
[
i
];
}
Index
numValidPoints
=
0
;
std
::
vector
<
Index
>
validPoints_
(
kernelVolume
*
(
NDim
+
1
));
Index
*
validPoints
=
validPoints_
.
data
();
Index
*
pointPtr
=
nullptr
;
for
(
int
j
=
0
;
j
<
numActIn
;
++
j
)
{
batchIdx
=
indicesIn
(
j
,
0
);
numValidPoints
=
getValidOutPos
<
Index
,
NDim
>
(
indicesIn
.
data
()
+
j
*
(
NDim
+
1
)
+
1
,
kernelSize
,
stride
,
padding
,
dilation
,
outSpatialShape
,
validPoints
);
for
(
Index
i
=
0
;
i
<
numValidPoints
;
++
i
)
{
pointPtr
=
validPoints
+
i
*
(
NDim
+
1
);
auto
offset
=
pointPtr
[
NDim
];
auto
index
=
tv
::
rowArrayIdx
<
Index
,
NDim
>
(
pointPtr
,
outSpatialShape
)
+
spatialVolume
*
batchIdx
;
if
(
gridsOut
[
index
]
==
-
1
)
{
for
(
unsigned
k
=
1
;
k
<
NDim
+
1
;
++
k
)
{
indicesOut
(
numAct
,
k
)
=
pointPtr
[
k
-
1
];
}
indicesOut
(
numAct
,
0
)
=
batchIdx
;
gridsOut
[
index
]
=
numAct
++
;
}
// indicePairs: [K, 2, L]
indicePairs
(
offset
,
0
,
indiceNum
[
offset
])
=
j
;
indicePairs
(
offset
,
1
,
indiceNum
[
offset
]
++
)
=
gridsOut
[
index
];
}
}
return
numAct
;
}
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
Index
getIndicePairsDeConv
(
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
Index
>
indicesOut
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
const
Index
*
kernelSize
,
const
Index
*
stride
,
const
Index
*
padding
,
const
Index
*
dilation
,
const
Index
*
outSpatialShape
)
{
Index
numAct
=
0
;
auto
numActIn
=
indicesIn
.
dim
(
0
);
Index
batchIdx
=
0
;
Index
spatialVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
spatialVolume
*=
outSpatialShape
[
i
];
}
Index
kernelVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
kernelVolume
*=
kernelSize
[
i
];
}
Index
numValidPoints
=
0
;
std
::
vector
<
Index
>
validPoints_
(
kernelVolume
*
(
NDim
+
1
));
Index
*
validPoints
=
validPoints_
.
data
();
Index
*
pointPtr
=
nullptr
;
for
(
int
j
=
0
;
j
<
numActIn
;
++
j
)
{
batchIdx
=
indicesIn
(
j
,
0
);
numValidPoints
=
getValidOutPosTranspose
<
Index
,
NDim
>
(
indicesIn
.
data
()
+
j
*
(
NDim
+
1
)
+
1
,
kernelSize
,
stride
,
padding
,
dilation
,
outSpatialShape
,
validPoints
);
for
(
Index
i
=
0
;
i
<
numValidPoints
;
++
i
)
{
pointPtr
=
validPoints
+
i
*
(
NDim
+
1
);
auto
offset
=
pointPtr
[
NDim
];
auto
index
=
tv
::
rowArrayIdx
<
Index
,
NDim
>
(
pointPtr
,
outSpatialShape
)
+
spatialVolume
*
batchIdx
;
if
(
gridsOut
[
index
]
==
-
1
)
{
for
(
unsigned
k
=
1
;
k
<
NDim
+
1
;
++
k
)
{
indicesOut
(
numAct
,
k
)
=
pointPtr
[
k
-
1
];
}
indicesOut
(
numAct
,
0
)
=
batchIdx
;
gridsOut
[
index
]
=
numAct
++
;
}
// indicePairs: [K, 2, L]
indicePairs
(
offset
,
0
,
indiceNum
[
offset
])
=
j
;
indicePairs
(
offset
,
1
,
indiceNum
[
offset
]
++
)
=
gridsOut
[
index
];
}
}
return
numAct
;
}
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
Index
getIndicePairsSubM
(
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
const
Index
*
const
kernelSize
,
const
Index
*
const
stride
,
const
Index
*
const
padding
,
const
Index
*
dilation
,
const
Index
*
const
outSpatialShape
)
{
Index
numAct
=
0
;
auto
numActIn
=
indicesIn
.
dim
(
0
);
Index
batchIdx
=
0
;
Index
spatialVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
spatialVolume
*=
outSpatialShape
[
i
];
}
Index
kernelVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
kernelVolume
*=
kernelSize
[
i
];
}
Index
numValidPoints
=
0
;
// Index validPoints[kernelVolume * (NDim + 1)];
std
::
vector
<
Index
>
validPoints_
(
kernelVolume
*
(
NDim
+
1
));
Index
*
validPoints
=
validPoints_
.
data
();
Index
*
pointPtr
=
nullptr
;
Index
index
=
0
;
for
(
int
j
=
0
;
j
<
numActIn
;
++
j
)
{
index
=
tv
::
rowArrayIdx
<
Index
,
NDim
>
(
indicesIn
.
data
()
+
j
*
(
NDim
+
1
)
+
1
,
outSpatialShape
)
+
spatialVolume
*
indicesIn
(
j
,
0
);
gridsOut
[
index
]
=
j
;
}
for
(
int
j
=
0
;
j
<
numActIn
;
++
j
)
{
numValidPoints
=
getValidOutPos
<
Index
,
NDim
>
(
indicesIn
.
data
()
+
j
*
(
NDim
+
1
)
+
1
,
kernelSize
,
stride
,
padding
,
dilation
,
outSpatialShape
,
validPoints
);
for
(
Index
i
=
0
;
i
<
numValidPoints
;
++
i
)
{
pointPtr
=
validPoints
+
i
*
(
NDim
+
1
);
auto
offset
=
pointPtr
[
NDim
];
index
=
tv
::
rowArrayIdx
<
Index
,
NDim
>
(
pointPtr
,
outSpatialShape
)
+
spatialVolume
*
indicesIn
(
j
,
0
);
if
(
gridsOut
[
index
]
>
-
1
)
{
indicePairs
(
offset
,
0
,
indiceNum
[
offset
])
=
j
;
indicePairs
(
offset
,
1
,
indiceNum
[
offset
]
++
)
=
gridsOut
[
index
];
}
}
}
return
numActIn
;
}
}
// namespace spconv
#endif
mmdet3d/ops/spconv/include/spconv/indice.cu.h
0 → 100644
View file @
d1aac35d
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef INDICE_CU_H_
#define INDICE_CU_H_
#include <tensorview/tensorview.h>
#include <tensorview/helper_kernel.cu.h>
#include <spconv/geometry.h>
namespace
spconv
{
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
,
int
KernelMaxVolume
=
256
>
__global__
void
prepareIndicePairsKernel
(
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
Index
>
indicesOut
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
tv
::
TensorView
<
Index
>
indicePairUnique
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
kernelSize
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
stride
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
padding
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
dilation
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
outSpatialShape
)
{
auto
numActIn
=
indicesIn
.
dim
(
0
);
Index
spatialVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
spatialVolume
*=
outSpatialShape
[
i
];
}
Index
kernelVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
kernelVolume
*=
kernelSize
[
i
];
}
Index
numValidPoints
=
0
;
Index
validPoints
[
KernelMaxVolume
*
(
NDim
+
1
)];
Index
*
pointPtr
=
nullptr
;
auto
indicePairsDim2
=
indicePairs
.
dim
(
2
);
Index
index
;
for
(
int
ix
:
tv
::
KernelLoopX
<
int
>
(
numActIn
))
{
numValidPoints
=
getValidOutPos
<
Index
,
NDim
>
(
indicesIn
.
data
()
+
ix
*
(
NDim
+
1
)
+
1
,
kernelSize
.
data
(),
stride
.
data
(),
padding
.
data
(),
dilation
.
data
(),
outSpatialShape
.
data
(),
validPoints
);
for
(
Index
i
=
0
;
i
<
numValidPoints
;
++
i
)
{
pointPtr
=
validPoints
+
i
*
(
NDim
+
1
);
auto
offset
=
pointPtr
[
NDim
];
auto
oldNum
=
atomicAdd
(
indiceNum
.
data
()
+
offset
,
Index
(
1
));
indicePairs
(
offset
,
0
,
oldNum
)
=
ix
;
index
=
tv
::
rowArrayIdx
<
Index
,
NDim
>
(
pointPtr
,
outSpatialShape
.
data
())
+
spatialVolume
*
indicesIn
(
ix
,
0
);
indicePairs
(
offset
,
1
,
oldNum
)
=
index
;
indicePairUnique
[
offset
*
indicePairsDim2
+
oldNum
]
=
index
;
}
}
}
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
,
int
KernelMaxVolume
=
256
>
__global__
void
prepareDeConvIndicePairsKernel
(
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
Index
>
indicesOut
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
tv
::
TensorView
<
Index
>
indicePairUnique
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
kernelSize
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
stride
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
padding
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
dilation
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
outSpatialShape
)
{
auto
numActIn
=
indicesIn
.
dim
(
0
);
Index
spatialVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
spatialVolume
*=
outSpatialShape
[
i
];
}
Index
kernelVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
kernelVolume
*=
kernelSize
[
i
];
}
Index
numValidPoints
=
0
;
Index
validPoints
[
KernelMaxVolume
*
(
NDim
+
1
)];
Index
*
pointPtr
=
nullptr
;
auto
indicePairsDim2
=
indicePairs
.
dim
(
2
);
Index
index
;
for
(
int
ix
:
tv
::
KernelLoopX
<
int
>
(
numActIn
))
{
numValidPoints
=
getValidOutPosTranspose
<
Index
,
NDim
>
(
indicesIn
.
data
()
+
ix
*
(
NDim
+
1
)
+
1
,
kernelSize
.
data
(),
stride
.
data
(),
padding
.
data
(),
dilation
.
data
(),
outSpatialShape
.
data
(),
validPoints
);
for
(
Index
i
=
0
;
i
<
numValidPoints
;
++
i
)
{
pointPtr
=
validPoints
+
i
*
(
NDim
+
1
);
auto
offset
=
pointPtr
[
NDim
];
auto
oldNum
=
atomicAdd
(
indiceNum
.
data
()
+
offset
,
Index
(
1
));
indicePairs
(
offset
,
0
,
oldNum
)
=
ix
;
index
=
tv
::
rowArrayIdx
<
Index
,
NDim
>
(
pointPtr
,
outSpatialShape
.
data
())
+
spatialVolume
*
indicesIn
(
ix
,
0
);
indicePairs
(
offset
,
1
,
oldNum
)
=
index
;
indicePairUnique
[
offset
*
indicePairsDim2
+
oldNum
]
=
index
;
}
}
}
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
__global__
void
assignGridAndIndiceOutKernel
(
tv
::
TensorView
<
Index
>
indicesOut
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
int
numAct
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indicePairUnique
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
outSpatialShape
,
int
batchSize
)
{
Index
index
;
auto
indicesOutPtr
=
indicesOut
.
data
();
for
(
int
ix
:
tv
::
KernelLoopX
<
int
>
(
numAct
))
{
index
=
indicePairUnique
[
ix
];
gridsOut
[
index
]
=
ix
;
index
=
tv
::
rowArrayIdxInv
<
Index
,
NDim
>
(
index
,
indicesOutPtr
+
ix
*
(
NDim
+
1
)
+
1
,
outSpatialShape
.
data
());
indicesOut
[
ix
*
(
NDim
+
1
)]
=
index
%
batchSize
;
}
}
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
__global__
void
assignIndicePairsKernel
(
tv
::
TensorView
<
Index
>
indicesOut
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
int
numActIn
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indicePairUnique
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
outSpatialShape
)
{
Index
index
;
int
kernelVolume
=
indicePairs
.
dim
(
0
);
for
(
int
ix
:
tv
::
KernelLoopX
<
int
>
(
numActIn
))
{
for
(
int
i
=
0
;
i
<
kernelVolume
;
++
i
)
{
index
=
indicePairs
(
i
,
1
,
ix
);
if
(
index
>
-
1
)
{
indicePairs
(
i
,
1
,
ix
)
=
gridsOut
[
index
];
}
}
}
}
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
__global__
void
prepareSubMGridKernel
(
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
outSpatialShape
)
{
auto
numActIn
=
indicesIn
.
dim
(
0
);
Index
spatialVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
spatialVolume
*=
outSpatialShape
[
i
];
}
Index
index
=
0
;
for
(
int
ix
:
tv
::
KernelLoopX
<
int
>
(
numActIn
))
{
index
=
tv
::
rowArrayIdx
<
Index
,
NDim
>
(
indicesIn
.
data
()
+
ix
*
(
NDim
+
1
)
+
1
,
outSpatialShape
.
data
())
+
spatialVolume
*
indicesIn
(
ix
,
0
);
gridsOut
[
index
]
=
ix
;
}
}
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
,
int
KernelMaxVolume
=
256
>
__global__
void
getSubMIndicePairsKernel
(
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
kernelSize
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
stride
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
padding
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
dilation
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
outSpatialShape
)
{
auto
numActIn
=
indicesIn
.
dim
(
0
);
Index
spatialVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
spatialVolume
*=
outSpatialShape
[
i
];
}
Index
numValidPoints
=
0
;
Index
validPoints
[
KernelMaxVolume
*
(
NDim
+
1
)];
Index
*
pointPtr
=
nullptr
;
Index
index
=
0
;
for
(
int
ix
:
tv
::
KernelLoopX
<
int
>
(
numActIn
))
{
numValidPoints
=
getValidOutPos
<
Index
,
NDim
>
(
indicesIn
.
data
()
+
ix
*
(
NDim
+
1
)
+
1
,
kernelSize
.
data
(),
stride
.
data
(),
padding
.
data
(),
dilation
.
data
(),
outSpatialShape
.
data
(),
validPoints
);
for
(
int
i
=
0
;
i
<
numValidPoints
;
++
i
)
{
pointPtr
=
validPoints
+
i
*
(
NDim
+
1
);
auto
offset
=
pointPtr
[
NDim
];
index
=
tv
::
rowArrayIdx
<
Index
,
NDim
>
(
pointPtr
,
outSpatialShape
.
data
())
+
spatialVolume
*
indicesIn
(
ix
,
0
);
if
(
gridsOut
[
index
]
>
-
1
)
{
auto
oldNum
=
atomicAdd
(
indiceNum
.
data
()
+
offset
,
Index
(
1
));
indicePairs
(
offset
,
1
,
oldNum
)
=
gridsOut
[
index
];
indicePairs
(
offset
,
0
,
oldNum
)
=
ix
;
}
}
}
}
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
__global__
void
resetGridKernel
(
const
Index
*
indicePairUnique
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
int
numAct
)
{
for
(
int
ix
:
tv
::
KernelLoopX
<
int
>
(
numAct
))
{
gridsOut
[
indicePairUnique
[
ix
]]
=
-
1
;
}
}
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
__global__
void
resetGridSubMKernel
(
const
Index
*
indices
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
outSpatialShape
,
int
numAct
)
{
int
outSpatialShapeReg
[
NDim
];
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
outSpatialShapeReg
[
i
]
=
outSpatialShape
[
i
];
}
Index
spatialVolume
=
1
;
auto
indsPtr
=
indices
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
spatialVolume
*=
outSpatialShape
[
i
];
}
Index
index
;
for
(
int
ix
:
tv
::
KernelLoopX
<
int
>
(
numAct
))
{
indsPtr
=
indices
+
ix
*
(
NDim
+
1
);
index
=
tv
::
rowArrayIdx
<
Index
,
NDim
>
(
indsPtr
+
1
,
outSpatialShapeReg
);
gridsOut
[
index
+
spatialVolume
*
indsPtr
[
0
]]
=
-
1
;
}
}
}
// namespace spconv
#endif
mmdet3d/ops/spconv/include/spconv/indice.h
0 → 100644
View file @
d1aac35d
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef SPARSE_CONV_INDICE_FUNCTOR_H_
#define SPARSE_CONV_INDICE_FUNCTOR_H_
#include <tensorview/tensorview.h>
namespace
spconv
{
namespace
functor
{
template
<
typename
Device
,
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
struct
CreateConvIndicePairFunctorP1
{
Index
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
Index
>
indicesOut
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
tv
::
TensorView
<
Index
>
indicePairUnique
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
kernelSize
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
stride
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
padding
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
dilation
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
outSpatialShape
,
bool
transpose
);
};
template
<
typename
Device
,
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
struct
CreateConvIndicePairFunctorP2
{
Index
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
Index
>
indicesOut
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
tv
::
TensorView
<
Index
>
indicePairUnique
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
outSpatialShape
,
bool
transpose
,
bool
resetGrid
=
false
);
};
template
<
typename
Device
,
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
struct
CreateConvIndicePairFunctor
{
Index
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
Index
>
indicesOut
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
kernelSize
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
stride
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
padding
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
dilation
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
outSpatialShape
,
bool
transpose
,
bool
resetGrid
=
false
);
};
template
<
typename
Device
,
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
struct
CreateSubMIndicePairFunctor
{
Index
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
kernelSize
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
stride
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
padding
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
dilation
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
outSpatialShape
,
bool
transpose
,
bool
resetGrid
=
false
);
};
}
// namespace functor
}
// namespace spconv
#endif
mmdet3d/ops/spconv/include/spconv/maxpool.h
0 → 100644
View file @
d1aac35d
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef SPARSE_MAXPOOL_FUNCTOR_H_
#define SPARSE_MAXPOOL_FUNCTOR_H_
#include <tensorview/tensorview.h>
namespace
spconv
{
namespace
functor
{
template
<
typename
Device
,
typename
T
,
typename
Index
>
struct
SparseMaxPoolForwardFunctor
{
void
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
T
>
outFeatures
,
tv
::
TensorView
<
const
T
>
inFeatures
,
tv
::
TensorView
<
const
Index
>
indices
,
int
size
);
};
template
<
typename
Device
,
typename
T
,
typename
Index
>
struct
SparseMaxPoolBackwardFunctor
{
void
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
const
T
>
outFeatures
,
tv
::
TensorView
<
const
T
>
inFeatures
,
tv
::
TensorView
<
const
T
>
dout
,
tv
::
TensorView
<
T
>
din
,
tv
::
TensorView
<
const
Index
>
indices
,
int
size
);
};
}
// namespace functor
}
// namespace spconv
#endif
mmdet3d/ops/spconv/include/spconv/mp_helper.h
0 → 100644
View file @
d1aac35d
#ifndef MP_HELPER_H_
#define MP_HELPER_H_
#include <type_traits>
#include <utility>
namespace
spconv
{
template
<
class
...
T
>
struct
mp_list
{};
template
<
class
T
,
T
...
I
>
using
mp_list_c
=
mp_list
<
std
::
integral_constant
<
T
,
I
>
...
>
;
namespace
detail
{
template
<
class
...
T
,
class
F
>
constexpr
F
mp_for_each_impl
(
mp_list
<
T
...
>
,
F
&&
f
)
{
return
std
::
initializer_list
<
int
>
{(
f
(
T
()),
0
)...},
std
::
forward
<
F
>
(
f
);
}
template
<
class
F
>
constexpr
F
mp_for_each_impl
(
mp_list
<>
,
F
&&
f
)
{
return
std
::
forward
<
F
>
(
f
);
}
}
// namespace detail
namespace
detail
{
template
<
class
A
,
template
<
class
...
>
class
B
>
struct
mp_rename_impl
{
// An error "no type named 'type'" here means that the first argument to
// mp_rename is not a list
};
template
<
template
<
class
...
>
class
A
,
class
...
T
,
template
<
class
...
>
class
B
>
struct
mp_rename_impl
<
A
<
T
...
>
,
B
>
{
using
type
=
B
<
T
...
>
;
};
}
// namespace detail
template
<
class
A
,
template
<
class
...
>
class
B
>
using
mp_rename
=
typename
detail
::
mp_rename_impl
<
A
,
B
>::
type
;
template
<
class
L
,
class
F
>
constexpr
F
mp_for_each
(
F
&&
f
)
{
return
detail
::
mp_for_each_impl
(
mp_rename
<
L
,
mp_list
>
(),
std
::
forward
<
F
>
(
f
));
}
}
// namespace spconv
#endif
mmdet3d/ops/spconv/include/spconv/nms.h
0 → 100644
View file @
d1aac35d
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef NMS_CPU_H
#define NMS_CPU_H
#include <pybind11/pybind11.h>
// must include pybind11/stl.h if using containers in STL in arguments.
#include <algorithm>
#include <boost/geometry.hpp>
#include <pybind11/numpy.h>
#include <pybind11/stl.h>
#include <vector>
#include "box_iou.h"
#include "nms_gpu.h"
namespace
spconv
{
namespace
py
=
pybind11
;
using
namespace
pybind11
::
literals
;
template
<
typename
DType
>
std
::
vector
<
int
>
non_max_suppression_cpu
(
py
::
array_t
<
DType
>
boxes
,
py
::
array_t
<
int
>
order
,
DType
thresh
,
DType
eps
=
0
)
{
auto
ndets
=
boxes
.
shape
(
0
);
auto
boxes_r
=
boxes
.
template
unchecked
<
2
>();
auto
order_r
=
order
.
template
unchecked
<
1
>();
auto
suppressed
=
zeros
<
int
>
({
int
(
ndets
)});
auto
suppressed_rw
=
suppressed
.
template
mutable_unchecked
<
1
>();
auto
area
=
zeros
<
DType
>
({
int
(
ndets
)});
auto
area_rw
=
area
.
template
mutable_unchecked
<
1
>();
// get areas
for
(
int
i
=
0
;
i
<
ndets
;
++
i
)
{
area_rw
(
i
)
=
(
boxes_r
(
i
,
2
)
-
boxes_r
(
i
,
0
)
+
eps
)
*
(
boxes_r
(
i
,
3
)
-
boxes_r
(
i
,
1
)
+
eps
);
}
std
::
vector
<
int
>
keep
;
int
i
,
j
;
DType
xx1
,
xx2
,
w
,
h
,
inter
,
ovr
;
for
(
int
_i
=
0
;
_i
<
ndets
;
++
_i
)
{
i
=
order_r
(
_i
);
if
(
suppressed_rw
(
i
)
==
1
)
continue
;
keep
.
push_back
(
i
);
for
(
int
_j
=
_i
+
1
;
_j
<
ndets
;
++
_j
)
{
j
=
order_r
(
_j
);
if
(
suppressed_rw
(
j
)
==
1
)
continue
;
xx2
=
std
::
min
(
boxes_r
(
i
,
2
),
boxes_r
(
j
,
2
));
xx1
=
std
::
max
(
boxes_r
(
i
,
0
),
boxes_r
(
j
,
0
));
w
=
xx2
-
xx1
+
eps
;
if
(
w
>
0
)
{
xx2
=
std
::
min
(
boxes_r
(
i
,
3
),
boxes_r
(
j
,
3
));
xx1
=
std
::
max
(
boxes_r
(
i
,
1
),
boxes_r
(
j
,
1
));
h
=
xx2
-
xx1
+
eps
;
if
(
h
>
0
)
{
inter
=
w
*
h
;
ovr
=
inter
/
(
area_rw
(
i
)
+
area_rw
(
j
)
-
inter
);
if
(
ovr
>=
thresh
)
suppressed_rw
(
j
)
=
1
;
}
}
}
}
return
keep
;
}
template
<
typename
DType
>
std
::
vector
<
int
>
rotate_non_max_suppression_cpu
(
py
::
array_t
<
DType
>
box_corners
,
py
::
array_t
<
int
>
order
,
py
::
array_t
<
DType
>
standup_iou
,
DType
thresh
)
{
auto
ndets
=
box_corners
.
shape
(
0
);
auto
box_corners_r
=
box_corners
.
template
unchecked
<
3
>();
auto
order_r
=
order
.
template
unchecked
<
1
>();
auto
suppressed
=
zeros
<
int
>
({
int
(
ndets
)});
auto
suppressed_rw
=
suppressed
.
template
mutable_unchecked
<
1
>();
auto
standup_iou_r
=
standup_iou
.
template
unchecked
<
2
>();
std
::
vector
<
int
>
keep
;
int
i
,
j
;
namespace
bg
=
boost
::
geometry
;
typedef
bg
::
model
::
point
<
DType
,
2
,
bg
::
cs
::
cartesian
>
point_t
;
typedef
bg
::
model
::
polygon
<
point_t
>
polygon_t
;
polygon_t
poly
,
qpoly
;
std
::
vector
<
polygon_t
>
poly_inter
,
poly_union
;
DType
inter_area
,
union_area
,
overlap
;
for
(
int
_i
=
0
;
_i
<
ndets
;
++
_i
)
{
i
=
order_r
(
_i
);
if
(
suppressed_rw
(
i
)
==
1
)
continue
;
keep
.
push_back
(
i
);
for
(
int
_j
=
_i
+
1
;
_j
<
ndets
;
++
_j
)
{
j
=
order_r
(
_j
);
if
(
suppressed_rw
(
j
)
==
1
)
continue
;
if
(
standup_iou_r
(
i
,
j
)
<=
0.0
)
continue
;
// std::cout << "pre_poly" << std::endl;
try
{
bg
::
append
(
poly
,
point_t
(
box_corners_r
(
i
,
0
,
0
),
box_corners_r
(
i
,
0
,
1
)));
bg
::
append
(
poly
,
point_t
(
box_corners_r
(
i
,
1
,
0
),
box_corners_r
(
i
,
1
,
1
)));
bg
::
append
(
poly
,
point_t
(
box_corners_r
(
i
,
2
,
0
),
box_corners_r
(
i
,
2
,
1
)));
bg
::
append
(
poly
,
point_t
(
box_corners_r
(
i
,
3
,
0
),
box_corners_r
(
i
,
3
,
1
)));
bg
::
append
(
poly
,
point_t
(
box_corners_r
(
i
,
0
,
0
),
box_corners_r
(
i
,
0
,
1
)));
bg
::
append
(
qpoly
,
point_t
(
box_corners_r
(
j
,
0
,
0
),
box_corners_r
(
j
,
0
,
1
)));
bg
::
append
(
qpoly
,
point_t
(
box_corners_r
(
j
,
1
,
0
),
box_corners_r
(
j
,
1
,
1
)));
bg
::
append
(
qpoly
,
point_t
(
box_corners_r
(
j
,
2
,
0
),
box_corners_r
(
j
,
2
,
1
)));
bg
::
append
(
qpoly
,
point_t
(
box_corners_r
(
j
,
3
,
0
),
box_corners_r
(
j
,
3
,
1
)));
bg
::
append
(
qpoly
,
point_t
(
box_corners_r
(
j
,
0
,
0
),
box_corners_r
(
j
,
0
,
1
)));
bg
::
intersection
(
poly
,
qpoly
,
poly_inter
);
}
catch
(
const
std
::
exception
&
e
)
{
std
::
cout
<<
"box i corners:"
<<
std
::
endl
;
for
(
int
k
=
0
;
k
<
4
;
++
k
)
{
std
::
cout
<<
box_corners_r
(
i
,
k
,
0
)
<<
" "
<<
box_corners_r
(
i
,
k
,
1
)
<<
std
::
endl
;
}
std
::
cout
<<
"box j corners:"
<<
std
::
endl
;
for
(
int
k
=
0
;
k
<
4
;
++
k
)
{
std
::
cout
<<
box_corners_r
(
j
,
k
,
0
)
<<
" "
<<
box_corners_r
(
j
,
k
,
1
)
<<
std
::
endl
;
}
// throw e;
continue
;
}
// std::cout << "post_poly" << std::endl;
// std::cout << "post_intsec" << std::endl;
if
(
!
poly_inter
.
empty
())
{
inter_area
=
bg
::
area
(
poly_inter
.
front
());
// std::cout << "pre_union" << " " << inter_area << std::endl;
bg
::
union_
(
poly
,
qpoly
,
poly_union
);
/*
if (poly_union.empty()){
std::cout << "intsec area:" << " " << inter_area << std::endl;
std::cout << "box i corners:" << std::endl;
for(int k = 0; k < 4; ++k){
std::cout << box_corners_r(i, k, 0) << " " << box_corners_r(i,
k, 1) << std::endl;
}
std::cout << "box j corners:" << std::endl;
for(int k = 0; k < 4; ++k){
std::cout << box_corners_r(j, k, 0) << " " << box_corners_r(j,
k, 1) << std::endl;
}
}*/
// std::cout << "post_union" << poly_union.empty() << std::endl;
if
(
!
poly_union
.
empty
())
{
// ignore invalid box
union_area
=
bg
::
area
(
poly_union
.
front
());
// std::cout << "post union area" << std::endl;
// std::cout << union_area << "debug" << std::endl;
overlap
=
inter_area
/
union_area
;
if
(
overlap
>=
thresh
)
suppressed_rw
(
j
)
=
1
;
poly_union
.
clear
();
}
}
poly
.
clear
();
qpoly
.
clear
();
poly_inter
.
clear
();
}
}
return
keep
;
}
constexpr
int
const
threadsPerBlock
=
sizeof
(
unsigned
long
long
)
*
8
;
template
<
typename
DType
>
int
non_max_suppression
(
py
::
array_t
<
DType
>
boxes
,
py
::
array_t
<
int
>
keep_out
,
DType
nms_overlap_thresh
,
int
device_id
)
{
py
::
buffer_info
info
=
boxes
.
request
();
auto
boxes_ptr
=
static_cast
<
DType
*>
(
info
.
ptr
);
py
::
buffer_info
info_k
=
keep_out
.
request
();
auto
keep_out_ptr
=
static_cast
<
int
*>
(
info_k
.
ptr
);
return
_nms_gpu
<
DType
,
threadsPerBlock
>
(
keep_out_ptr
,
boxes_ptr
,
boxes
.
shape
(
0
),
boxes
.
shape
(
1
),
nms_overlap_thresh
,
device_id
);
}
}
// namespace spconv
#endif
mmdet3d/ops/spconv/include/spconv/nms_functor.h
0 → 100644
View file @
d1aac35d
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef NMS_FUNCTOR_H_
#define NMS_FUNCTOR_H_
#include <tensorview/tensorview.h>
namespace
spconv
{
namespace
functor
{
template
<
typename
Device
,
typename
T
,
typename
Index
>
struct
NonMaxSupressionFunctor
{
Index
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
Index
>
keep
,
tv
::
TensorView
<
const
T
>
boxes
,
T
threshold
,
T
eps
);
};
template
<
typename
Device
,
typename
T
,
typename
Index
>
struct
rotateNonMaxSupressionFunctor
{
Index
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
Index
>
keep
,
tv
::
TensorView
<
const
T
>
boxCorners
,
tv
::
TensorView
<
const
T
>
standupIoU
,
T
threshold
);
};
}
// namespace functor
}
// namespace spconv
#endif
mmdet3d/ops/spconv/include/spconv/nms_gpu.h
0 → 100644
View file @
d1aac35d
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
template
<
typename
DType
,
int
BLOCK_THREADS
>
int
_nms_gpu
(
int
*
keep_out
,
const
DType
*
boxes_host
,
int
boxes_num
,
int
boxes_dim
,
DType
nms_overlap_thresh
,
int
device_id
);
mmdet3d/ops/spconv/include/spconv/nms_ops.h
0 → 100644
View file @
d1aac35d
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef NMS_TORCH_OP_H_
#define NMS_TORCH_OP_H_
#include <cuda_runtime_api.h>
#include <spconv/indice.h>
#include <spconv/reordering.h>
#include <torch/script.h>
#include <torch_utils.h>
#include <utility/timer.h>
#include <spconv/nms_functor.h>
namespace
spconv
{
// torch.jit's doc says only support int64, so we need to convert to int32.
template
<
typename
T
>
torch
::
Tensor
nonMaxSuppression
(
torch
::
Tensor
boxes
,
torch
::
Tensor
scores
,
int64_t
preMaxSize
,
int64_t
postMaxSize
,
double
thresh
,
double
eps
)
{
// auto timer = spconv::CudaContextTimer<>();
tv
::
check_torch_dtype
<
T
>
(
boxes
);
auto
resOptions
=
torch
::
TensorOptions
().
dtype
(
torch
::
kInt64
).
device
(
boxes
.
device
());
if
(
boxes
.
size
(
0
)
==
0
){
return
torch
::
zeros
({
0
},
resOptions
);
}
torch
::
Tensor
indices
;
if
(
preMaxSize
>
0
){
auto
numKeepedScores
=
scores
.
size
(
0
);
preMaxSize
=
std
::
min
(
numKeepedScores
,
preMaxSize
);
auto
res
=
torch
::
topk
(
scores
,
preMaxSize
);
indices
=
std
::
get
<
1
>
(
res
);
boxes
=
torch
::
index_select
(
boxes
,
0
,
indices
);
}
else
{
indices
=
std
::
get
<
1
>
(
torch
::
sort
(
scores
));
boxes
=
torch
::
index_select
(
boxes
,
0
,
indices
);
}
if
(
boxes
.
size
(
0
)
==
0
)
return
torch
::
zeros
({
0
},
resOptions
);
auto
keep
=
torch
::
zeros
({
boxes
.
size
(
0
)},
resOptions
);
int64_t
keepNum
=
0
;
if
(
boxes
.
device
().
type
()
==
torch
::
kCPU
)
{
auto
nmsFunctor
=
functor
::
NonMaxSupressionFunctor
<
tv
::
CPU
,
T
,
int64_t
>
();
keepNum
=
nmsFunctor
(
tv
::
CPU
(),
tv
::
torch2tv
<
int64_t
>
(
keep
),
tv
::
torch2tv
<
const
T
>
(
boxes
),
T
(
thresh
),
T
(
eps
));
}
else
{
TV_ASSERT_RT_ERR
(
false
,
"not implemented"
);
}
if
(
postMaxSize
<=
0
){
postMaxSize
=
keepNum
;
}
// std::cout << keep << std::endl;
keep
=
keep
.
slice
(
0
,
0
,
std
::
min
(
keepNum
,
postMaxSize
));
if
(
preMaxSize
>
0
){
return
torch
::
index_select
(
indices
,
0
,
keep
);
}
return
keep
;
}
}
// namespace spconv
#endif
Prev
1
…
4
5
6
7
8
9
10
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment