Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
643b46d3
Commit
643b46d3
authored
May 09, 2020
by
zww
Browse files
Replace #include <torch/extension.h> by #include <torch/types.h> to avoid cuda bug
parent
397a9280
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
319 additions
and
275 deletions
+319
-275
mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cuda.cu
mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cuda.cu
+1
-1
mmdet3d/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
mmdet3d/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
+318
-274
No files found.
mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cuda.cu
View file @
643b46d3
...
@@ -7,8 +7,8 @@
...
@@ -7,8 +7,8 @@
#include <assert.h>
#include <assert.h>
#include <math.h>
#include <math.h>
#include <stdio.h>
#include <stdio.h>
#include <torch/extension.h>
#include <torch/serialize/tensor.h>
#include <torch/serialize/tensor.h>
#include <torch/types.h>
#define THREADS_PER_BLOCK 256
#define THREADS_PER_BLOCK 256
#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
...
...
mmdet3d/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
View file @
643b46d3
//Modified from
//
Modified from
//https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
//
https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
//RoI-aware point cloud feature pooling
//
RoI-aware point cloud feature pooling
//Written by Shaoshuai Shi
//
Written by Shaoshuai Shi
//All Rights Reserved 2019.
//
All Rights Reserved 2019.
#include <torch/serialize/tensor.h>
#include <torch/extension.h>
#include <assert.h>
#include <assert.h>
#include <math.h>
#include <math.h>
#include <stdio.h>
#include <stdio.h>
#include <torch/serialize/tensor.h>
#include <torch/types.h>
#define THREADS_PER_BLOCK 256
#define THREADS_PER_BLOCK 256
#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
#define DIVUP(m,
n) ((m) / (n) + ((m) % (n) > 0))
// #define DEBUG
// #define DEBUG
__device__
inline
void
lidar_to_local_coords
(
float
shift_x
,
float
shift_y
,
__device__
inline
void
lidar_to_local_coords
(
float
shift_x
,
float
shift_y
,
float
rz
,
float
&
local_x
,
float
&
local_y
){
float
rz
,
float
&
local_x
,
// should rotate pi/2 + alpha to translate LiDAR to local
float
&
local_y
)
{
float
rot_angle
=
rz
+
M_PI
/
2
;
// should rotate pi/2 + alpha to translate LiDAR to local
float
cosa
=
cos
(
rot_angle
),
sina
=
sin
(
rot_angle
);
float
rot_angle
=
rz
+
M_PI
/
2
;
local_x
=
shift_x
*
cosa
+
shift_y
*
(
-
sina
);
float
cosa
=
cos
(
rot_angle
),
sina
=
sin
(
rot_angle
);
local_y
=
shift_x
*
sina
+
shift_y
*
cosa
;
local_x
=
shift_x
*
cosa
+
shift_y
*
(
-
sina
);
local_y
=
shift_x
*
sina
+
shift_y
*
cosa
;
}
}
__device__
inline
int
check_pt_in_box3d
(
const
float
*
pt
,
const
float
*
box3d
,
__device__
inline
int
check_pt_in_box3d
(
const
float
*
pt
,
const
float
*
box3d
,
float
&
local_x
,
float
&
local_y
){
float
&
local_x
,
float
&
local_y
)
{
// param pt: (x, y, z)
// param pt: (x, y, z)
// param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the bottom center
// param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the
float
x
=
pt
[
0
],
y
=
pt
[
1
],
z
=
pt
[
2
];
// bottom center
float
cx
=
box3d
[
0
],
cy
=
box3d
[
1
],
cz
=
box3d
[
2
];
float
x
=
pt
[
0
],
y
=
pt
[
1
],
z
=
pt
[
2
];
float
w
=
box3d
[
3
],
l
=
box3d
[
4
],
h
=
box3d
[
5
],
rz
=
box3d
[
6
];
float
cx
=
box3d
[
0
],
cy
=
box3d
[
1
],
cz
=
box3d
[
2
];
cz
+=
h
/
2.0
;
// shift to the center since cz in box3d is the bottom center
float
w
=
box3d
[
3
],
l
=
box3d
[
4
],
h
=
box3d
[
5
],
rz
=
box3d
[
6
];
cz
+=
h
/
2.0
;
// shift to the center since cz in box3d is the bottom center
if
(
fabsf
(
z
-
cz
)
>
h
/
2.0
)
return
0
;
lidar_to_local_coords
(
x
-
cx
,
y
-
cy
,
rz
,
local_x
,
local_y
);
if
(
fabsf
(
z
-
cz
)
>
h
/
2.0
)
return
0
;
float
in_flag
=
(
local_x
>
-
l
/
2.0
)
&
(
local_x
<
l
/
2.0
)
&
(
local_y
>
-
w
/
2.0
)
&
(
local_y
<
w
/
2.0
);
lidar_to_local_coords
(
x
-
cx
,
y
-
cy
,
rz
,
local_x
,
local_y
);
return
in_flag
;
float
in_flag
=
(
local_x
>
-
l
/
2.0
)
&
(
local_x
<
l
/
2.0
)
&
(
local_y
>
-
w
/
2.0
)
&
(
local_y
<
w
/
2.0
);
return
in_flag
;
}
}
__global__
void
generate_pts_mask_for_box3d
(
int
boxes_num
,
int
pts_num
,
__global__
void
generate_pts_mask_for_box3d
(
int
boxes_num
,
int
pts_num
,
int
out_x
,
int
out_y
,
int
out_z
,
int
out_x
,
int
out_y
,
int
out_z
,
const
float
*
rois
,
const
float
*
pts
,
int
*
pts_mask
){
const
float
*
rois
,
const
float
*
pts
,
// params rois: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate
int
*
pts_mask
)
{
// params pts: (npoints, 3) [x, y, z]
// params rois: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate
// params pts_mask: (N, npoints): -1 means point doesnot in this box, otherwise: encode (x_idxs, y_idxs, z_idxs) by binary bit
// params pts: (npoints, 3) [x, y, z]
int
pt_idx
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
// params pts_mask: (N, npoints): -1 means point doesnot in this box,
int
box_idx
=
blockIdx
.
y
;
// otherwise: encode (x_idxs, y_idxs, z_idxs) by binary bit
if
(
pt_idx
>=
pts_num
||
box_idx
>=
boxes_num
)
return
;
int
pt_idx
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
box_idx
=
blockIdx
.
y
;
pts
+=
pt_idx
*
3
;
if
(
pt_idx
>=
pts_num
||
box_idx
>=
boxes_num
)
return
;
rois
+=
box_idx
*
7
;
pts_mask
+=
box_idx
*
pts_num
+
pt_idx
;
pts
+=
pt_idx
*
3
;
rois
+=
box_idx
*
7
;
float
local_x
=
0
,
local_y
=
0
;
pts_mask
+=
box_idx
*
pts_num
+
pt_idx
;
int
cur_in_flag
=
check_pt_in_box3d
(
pts
,
rois
,
local_x
,
local_y
);
float
local_x
=
0
,
local_y
=
0
;
pts_mask
[
0
]
=
-
1
;
int
cur_in_flag
=
check_pt_in_box3d
(
pts
,
rois
,
local_x
,
local_y
);
if
(
cur_in_flag
>
0
){
float
local_z
=
pts
[
2
]
-
rois
[
2
];
pts_mask
[
0
]
=
-
1
;
float
w
=
rois
[
3
],
l
=
rois
[
4
],
h
=
rois
[
5
];
if
(
cur_in_flag
>
0
)
{
float
local_z
=
pts
[
2
]
-
rois
[
2
];
float
x_res
=
l
/
out_x
;
float
w
=
rois
[
3
],
l
=
rois
[
4
],
h
=
rois
[
5
];
float
y_res
=
w
/
out_y
;
float
z_res
=
h
/
out_z
;
float
x_res
=
l
/
out_x
;
float
y_res
=
w
/
out_y
;
unsigned
int
x_idx
=
int
((
local_x
+
l
/
2
)
/
x_res
);
float
z_res
=
h
/
out_z
;
unsigned
int
y_idx
=
int
((
local_y
+
w
/
2
)
/
y_res
);
unsigned
int
z_idx
=
int
(
local_z
/
z_res
);
unsigned
int
x_idx
=
int
((
local_x
+
l
/
2
)
/
x_res
);
unsigned
int
y_idx
=
int
((
local_y
+
w
/
2
)
/
y_res
);
x_idx
=
min
(
max
(
x_idx
,
0
),
out_x
-
1
);
unsigned
int
z_idx
=
int
(
local_z
/
z_res
);
y_idx
=
min
(
max
(
y_idx
,
0
),
out_y
-
1
);
z_idx
=
min
(
max
(
z_idx
,
0
),
out_z
-
1
);
x_idx
=
min
(
max
(
x_idx
,
0
),
out_x
-
1
);
y_idx
=
min
(
max
(
y_idx
,
0
),
out_y
-
1
);
unsigned
int
idx_encoding
=
(
x_idx
<<
16
)
+
(
y_idx
<<
8
)
+
z_idx
;
z_idx
=
min
(
max
(
z_idx
,
0
),
out_z
-
1
);
unsigned
int
idx_encoding
=
(
x_idx
<<
16
)
+
(
y_idx
<<
8
)
+
z_idx
;
#ifdef DEBUG
#ifdef DEBUG
printf
(
"mask: pts_%d(%.3f, %.3f, %.3f), local(%.3f, %.3f, %.3f), idx(%d, %d, %d), res(%.3f, %.3f, %.3f), idx_encoding=%x
\n
"
,
printf
(
pt_idx
,
pts
[
0
],
pts
[
1
],
pts
[
2
],
local_x
,
local_y
,
local_z
,
x_idx
,
y_idx
,
z_idx
,
x_res
,
y_res
,
z_res
,
idx_encoding
);
"mask: pts_%d(%.3f, %.3f, %.3f), local(%.3f, %.3f, %.3f), idx(%d, %d, "
"%d), res(%.3f, %.3f, %.3f), idx_encoding=%x
\n
"
,
pt_idx
,
pts
[
0
],
pts
[
1
],
pts
[
2
],
local_x
,
local_y
,
local_z
,
x_idx
,
y_idx
,
z_idx
,
x_res
,
y_res
,
z_res
,
idx_encoding
);
#endif
#endif
pts_mask
[
0
]
=
idx_encoding
;
pts_mask
[
0
]
=
idx_encoding
;
}
}
}
}
__global__
void
collect_inside_pts_for_box3d
(
int
boxes_num
,
int
pts_num
,
__global__
void
collect_inside_pts_for_box3d
(
int
boxes_num
,
int
pts_num
,
int
max_pts_each_voxel
,
int
max_pts_each_voxel
,
int
out_x
,
int
out_x
,
int
out_y
,
int
out_z
,
const
int
*
pts_mask
,
int
*
pts_idx_of_voxels
){
int
out_y
,
int
out_z
,
// params pts_mask: (N, npoints) 0 or 1
const
int
*
pts_mask
,
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
int
*
pts_idx_of_voxels
)
{
// params pts_mask: (N, npoints) 0 or 1
int
box_idx
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
if
(
box_idx
>=
boxes_num
)
return
;
int
box_idx
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
max_num_pts
=
max_pts_each_voxel
-
1
;
// index 0 is the counter
if
(
box_idx
>=
boxes_num
)
return
;
pts_idx_of_voxels
+=
box_idx
*
out_x
*
out_y
*
out_z
*
max_pts_each_voxel
;
int
max_num_pts
=
max_pts_each_voxel
-
1
;
// index 0 is the counter
for
(
int
k
=
0
;
k
<
pts_num
;
k
++
){
pts_idx_of_voxels
+=
box_idx
*
out_x
*
out_y
*
out_z
*
max_pts_each_voxel
;
if
(
pts_mask
[
box_idx
*
pts_num
+
k
]
!=
-
1
){
unsigned
int
idx_encoding
=
pts_mask
[
box_idx
*
pts_num
+
k
];
for
(
int
k
=
0
;
k
<
pts_num
;
k
++
)
{
unsigned
int
x_idx
=
(
idx_encoding
>>
16
)
&
0xFF
;
if
(
pts_mask
[
box_idx
*
pts_num
+
k
]
!=
-
1
)
{
unsigned
int
y_idx
=
(
idx_encoding
>>
8
)
&
0xFF
;
unsigned
int
idx_encoding
=
pts_mask
[
box_idx
*
pts_num
+
k
];
unsigned
int
z_idx
=
idx_encoding
&
0xFF
;
unsigned
int
x_idx
=
(
idx_encoding
>>
16
)
&
0xFF
;
unsigned
int
base_offset
=
x_idx
*
out_y
*
out_z
*
max_pts_each_voxel
+
y_idx
*
out_z
*
max_pts_each_voxel
+
z_idx
*
max_pts_each_voxel
;
unsigned
int
y_idx
=
(
idx_encoding
>>
8
)
&
0xFF
;
unsigned
int
cnt
=
pts_idx_of_voxels
[
base_offset
];
unsigned
int
z_idx
=
idx_encoding
&
0xFF
;
if
(
cnt
<
max_num_pts
){
unsigned
int
base_offset
=
x_idx
*
out_y
*
out_z
*
max_pts_each_voxel
+
pts_idx_of_voxels
[
base_offset
+
cnt
+
1
]
=
k
;
y_idx
*
out_z
*
max_pts_each_voxel
+
pts_idx_of_voxels
[
base_offset
]
++
;
z_idx
*
max_pts_each_voxel
;
}
unsigned
int
cnt
=
pts_idx_of_voxels
[
base_offset
];
if
(
cnt
<
max_num_pts
)
{
pts_idx_of_voxels
[
base_offset
+
cnt
+
1
]
=
k
;
pts_idx_of_voxels
[
base_offset
]
++
;
}
#ifdef DEBUG
#ifdef DEBUG
printf
(
"collect: pts_%d, idx(%d, %d, %d), idx_encoding=%x
\n
"
,
printf
(
"collect: pts_%d, idx(%d, %d, %d), idx_encoding=%x
\n
"
,
k
,
x_idx
,
k
,
x_idx
,
y_idx
,
z_idx
,
idx_encoding
);
y_idx
,
z_idx
,
idx_encoding
);
#endif
#endif
}
}
}
}
}
}
__global__
void
roiaware_maxpool3d
(
int
boxes_num
,
int
pts_num
,
int
channels
,
__global__
void
roiaware_maxpool3d
(
int
boxes_num
,
int
pts_num
,
int
channels
,
int
max_pts_each_voxel
,
int
out_x
,
int
max_pts_each_voxel
,
int
out_x
,
int
out_y
,
int
out_y
,
int
out_z
,
const
float
*
pts_feature
,
const
int
*
pts_idx_of_voxels
,
float
*
pooled_features
,
int
*
argmax
){
int
out_z
,
const
float
*
pts_feature
,
// params pts_feature: (npoints, C)
const
int
*
pts_idx_of_voxels
,
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel), index 0 is the counter
float
*
pooled_features
,
int
*
argmax
)
{
// params pooled_features: (N, out_x, out_y, out_z, C)
// params pts_feature: (npoints, C)
// params argmax: (N, out_x, out_y, out_z, C)
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel),
// index 0 is the counter params pooled_features: (N, out_x, out_y, out_z, C)
int
box_idx
=
blockIdx
.
z
;
// params argmax: (N, out_x, out_y, out_z, C)
int
channel_idx
=
blockIdx
.
y
;
int
voxel_idx_flat
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
box_idx
=
blockIdx
.
z
;
int
channel_idx
=
blockIdx
.
y
;
int
x_idx
=
voxel_idx_flat
/
(
out_y
*
out_z
);
int
voxel_idx_flat
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
y_idx
=
(
voxel_idx_flat
-
x_idx
*
(
out_y
*
out_z
))
/
out_z
;
int
z_idx
=
voxel_idx_flat
%
out_z
;
int
x_idx
=
voxel_idx_flat
/
(
out_y
*
out_z
);
if
(
box_idx
>=
boxes_num
||
channel_idx
>=
channels
||
x_idx
>=
out_x
||
y_idx
>=
out_y
||
z_idx
>=
out_z
)
return
;
int
y_idx
=
(
voxel_idx_flat
-
x_idx
*
(
out_y
*
out_z
))
/
out_z
;
int
z_idx
=
voxel_idx_flat
%
out_z
;
if
(
box_idx
>=
boxes_num
||
channel_idx
>=
channels
||
x_idx
>=
out_x
||
y_idx
>=
out_y
||
z_idx
>=
out_z
)
return
;
#ifdef DEBUG
#ifdef DEBUG
printf
(
"src pts_idx_of_voxels: (%p, ), argmax: %p
\n
"
,
pts_idx_of_voxels
,
argmax
);
printf
(
"src pts_idx_of_voxels: (%p, ), argmax: %p
\n
"
,
pts_idx_of_voxels
,
argmax
);
#endif
#endif
int
offset_base
=
x_idx
*
out_y
*
out_z
+
y_idx
*
out_z
+
z_idx
;
int
offset_base
=
x_idx
*
out_y
*
out_z
+
y_idx
*
out_z
+
z_idx
;
pts_idx_of_voxels
+=
box_idx
*
out_x
*
out_y
*
out_z
*
max_pts_each_voxel
+
offset_base
*
max_pts_each_voxel
;
pts_idx_of_voxels
+=
box_idx
*
out_x
*
out_y
*
out_z
*
max_pts_each_voxel
+
pooled_features
+=
box_idx
*
out_x
*
out_y
*
out_z
*
channels
+
offset_base
*
channels
+
channel_idx
;
offset_base
*
max_pts_each_voxel
;
argmax
+=
box_idx
*
out_x
*
out_y
*
out_z
*
channels
+
offset_base
*
channels
+
channel_idx
;
pooled_features
+=
box_idx
*
out_x
*
out_y
*
out_z
*
channels
+
offset_base
*
channels
+
channel_idx
;
argmax
+=
box_idx
*
out_x
*
out_y
*
out_z
*
channels
+
offset_base
*
channels
+
channel_idx
;
int
argmax_idx
=
-
1
;
int
argmax_idx
=
-
1
;
float
max_val
=
-
1e50
;
float
max_val
=
-
1e50
;
int
total_pts
=
pts_idx_of_voxels
[
0
];
int
total_pts
=
pts_idx_of_voxels
[
0
];
for
(
int
k
=
1
;
k
<=
total_pts
;
k
++
){
for
(
int
k
=
1
;
k
<=
total_pts
;
k
++
)
{
if
(
pts_feature
[
pts_idx_of_voxels
[
k
]
*
channels
+
channel_idx
]
>
max_val
){
if
(
pts_feature
[
pts_idx_of_voxels
[
k
]
*
channels
+
channel_idx
]
>
max_val
)
{
max_val
=
pts_feature
[
pts_idx_of_voxels
[
k
]
*
channels
+
channel_idx
];
max_val
=
pts_feature
[
pts_idx_of_voxels
[
k
]
*
channels
+
channel_idx
];
argmax_idx
=
pts_idx_of_voxels
[
k
];
argmax_idx
=
pts_idx_of_voxels
[
k
];
}
}
}
}
if
(
argmax_idx
!=
-
1
){
if
(
argmax_idx
!=
-
1
)
{
pooled_features
[
0
]
=
max_val
;
pooled_features
[
0
]
=
max_val
;
}
}
argmax
[
0
]
=
argmax_idx
;
argmax
[
0
]
=
argmax_idx
;
#ifdef DEBUG
#ifdef DEBUG
printf
(
"channel_%d idx(%d, %d, %d), argmax_idx=(%d, %.3f), total=%d, after pts_idx: %p, argmax: (%p, %d)
\n
"
,
printf
(
channel_idx
,
x_idx
,
y_idx
,
z_idx
,
argmax_idx
,
max_val
,
total_pts
,
pts_idx_of_voxels
,
argmax
,
argmax_idx
);
"channel_%d idx(%d, %d, %d), argmax_idx=(%d, %.3f), total=%d, after "
"pts_idx: %p, argmax: (%p, %d)
\n
"
,
channel_idx
,
x_idx
,
y_idx
,
z_idx
,
argmax_idx
,
max_val
,
total_pts
,
pts_idx_of_voxels
,
argmax
,
argmax_idx
);
#endif
#endif
}
}
__global__
void
roiaware_avgpool3d
(
int
boxes_num
,
int
pts_num
,
int
channels
,
__global__
void
roiaware_avgpool3d
(
int
boxes_num
,
int
pts_num
,
int
channels
,
int
max_pts_each_voxel
,
int
out_x
,
int
max_pts_each_voxel
,
int
out_x
,
int
out_y
,
int
out_y
,
int
out_z
,
const
float
*
pts_feature
,
const
int
*
pts_idx_of_voxels
,
float
*
pooled_features
){
int
out_z
,
const
float
*
pts_feature
,
// params pts_feature: (npoints, C)
const
int
*
pts_idx_of_voxels
,
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel), index 0 is the counter
float
*
pooled_features
)
{
// params pooled_features: (N, out_x, out_y, out_z, C)
// params pts_feature: (npoints, C)
// params argmax: (N, out_x, out_y, out_z, C)
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel),
// index 0 is the counter params pooled_features: (N, out_x, out_y, out_z, C)
int
box_idx
=
blockIdx
.
z
;
// params argmax: (N, out_x, out_y, out_z, C)
int
channel_idx
=
blockIdx
.
y
;
int
voxel_idx_flat
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
box_idx
=
blockIdx
.
z
;
int
channel_idx
=
blockIdx
.
y
;
int
x_idx
=
voxel_idx_flat
/
(
out_y
*
out_z
);
int
voxel_idx_flat
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
y_idx
=
(
voxel_idx_flat
-
x_idx
*
(
out_y
*
out_z
))
/
out_z
;
int
z_idx
=
voxel_idx_flat
%
out_z
;
int
x_idx
=
voxel_idx_flat
/
(
out_y
*
out_z
);
if
(
box_idx
>=
boxes_num
||
channel_idx
>=
channels
||
x_idx
>=
out_x
||
y_idx
>=
out_y
||
z_idx
>=
out_z
)
return
;
int
y_idx
=
(
voxel_idx_flat
-
x_idx
*
(
out_y
*
out_z
))
/
out_z
;
int
z_idx
=
voxel_idx_flat
%
out_z
;
int
offset_base
=
x_idx
*
out_y
*
out_z
+
y_idx
*
out_z
+
z_idx
;
if
(
box_idx
>=
boxes_num
||
channel_idx
>=
channels
||
x_idx
>=
out_x
||
pts_idx_of_voxels
+=
box_idx
*
out_x
*
out_y
*
out_z
*
max_pts_each_voxel
+
offset_base
*
max_pts_each_voxel
;
y_idx
>=
out_y
||
z_idx
>=
out_z
)
pooled_features
+=
box_idx
*
out_x
*
out_y
*
out_z
*
channels
+
offset_base
*
channels
+
channel_idx
;
return
;
float
sum_val
=
0
;
int
offset_base
=
x_idx
*
out_y
*
out_z
+
y_idx
*
out_z
+
z_idx
;
int
total_pts
=
pts_idx_of_voxels
[
0
];
pts_idx_of_voxels
+=
box_idx
*
out_x
*
out_y
*
out_z
*
max_pts_each_voxel
+
offset_base
*
max_pts_each_voxel
;
for
(
int
k
=
1
;
k
<=
total_pts
;
k
++
){
pooled_features
+=
box_idx
*
out_x
*
out_y
*
out_z
*
channels
+
sum_val
+=
pts_feature
[
pts_idx_of_voxels
[
k
]
*
channels
+
channel_idx
];
offset_base
*
channels
+
channel_idx
;
}
float
sum_val
=
0
;
if
(
total_pts
>
0
){
int
total_pts
=
pts_idx_of_voxels
[
0
];
pooled_features
[
0
]
=
sum_val
/
total_pts
;
}
for
(
int
k
=
1
;
k
<=
total_pts
;
k
++
)
{
sum_val
+=
pts_feature
[
pts_idx_of_voxels
[
k
]
*
channels
+
channel_idx
];
}
if
(
total_pts
>
0
)
{
pooled_features
[
0
]
=
sum_val
/
total_pts
;
}
}
}
void
roiaware_pool3d_launcher
(
int
boxes_num
,
int
pts_num
,
int
channels
,
int
max_pts_each_voxel
,
int
out_x
,
int
out_y
,
void
roiaware_pool3d_launcher
(
int
boxes_num
,
int
pts_num
,
int
channels
,
int
max_pts_each_voxel
,
int
out_x
,
int
out_y
,
int
out_z
,
int
out_z
,
const
float
*
rois
,
const
float
*
pts
,
const
float
*
rois
,
const
float
*
pts
,
const
float
*
pts_feature
,
int
*
argmax
,
int
*
pts_idx_of_voxels
,
float
*
pooled_features
,
int
pool_method
){
const
float
*
pts_feature
,
int
*
argmax
,
// params rois: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate
int
*
pts_idx_of_voxels
,
float
*
pooled_features
,
// params pts: (npoints, 3) [x, y, z] in LiDAR coordinate
int
pool_method
)
{
// params pts_feature: (npoints, C)
// params rois: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate
// params argmax: (N, out_x, out_y, out_z, C)
// params pts: (npoints, 3) [x, y, z] in LiDAR coordinate
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
// params pts_feature: (npoints, C)
// params pooled_features: (N, out_x, out_y, out_z, C)
// params argmax: (N, out_x, out_y, out_z, C)
// params pool_method: 0: max_pool 1: avg_pool
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
// params pooled_features: (N, out_x, out_y, out_z, C)
int
*
pts_mask
=
NULL
;
// params pool_method: 0: max_pool 1: avg_pool
cudaMalloc
(
&
pts_mask
,
boxes_num
*
pts_num
*
sizeof
(
int
));
// (N, M)
cudaMemset
(
pts_mask
,
-
1
,
boxes_num
*
pts_num
*
sizeof
(
int
));
int
*
pts_mask
=
NULL
;
cudaMalloc
(
&
pts_mask
,
boxes_num
*
pts_num
*
sizeof
(
int
));
// (N, M)
dim3
blocks_mask
(
DIVUP
(
pts_num
,
THREADS_PER_BLOCK
),
boxes_num
);
cudaMemset
(
pts_mask
,
-
1
,
boxes_num
*
pts_num
*
sizeof
(
int
));
dim3
threads
(
THREADS_PER_BLOCK
);
generate_pts_mask_for_box3d
<<<
blocks_mask
,
threads
>>>
(
boxes_num
,
pts_num
,
out_x
,
out_y
,
out_z
,
rois
,
pts
,
pts_mask
);
dim3
blocks_mask
(
DIVUP
(
pts_num
,
THREADS_PER_BLOCK
),
boxes_num
);
dim3
threads
(
THREADS_PER_BLOCK
);
// TODO: Merge the collect and pool functions, SS
generate_pts_mask_for_box3d
<<<
blocks_mask
,
threads
>>>
(
boxes_num
,
pts_num
,
out_x
,
out_y
,
out_z
,
rois
,
pts
,
pts_mask
);
dim3
blocks_collect
(
DIVUP
(
boxes_num
,
THREADS_PER_BLOCK
));
collect_inside_pts_for_box3d
<<<
blocks_collect
,
threads
>>>
(
boxes_num
,
pts_num
,
max_pts_each_voxel
,
// TODO: Merge the collect and pool functions, SS
out_x
,
out_y
,
out_z
,
pts_mask
,
pts_idx_of_voxels
);
dim3
blocks_collect
(
DIVUP
(
boxes_num
,
THREADS_PER_BLOCK
));
dim3
blocks_pool
(
DIVUP
(
out_x
*
out_y
*
out_z
,
THREADS_PER_BLOCK
),
channels
,
boxes_num
);
collect_inside_pts_for_box3d
<<<
blocks_collect
,
threads
>>>
(
if
(
pool_method
==
0
){
boxes_num
,
pts_num
,
max_pts_each_voxel
,
out_x
,
out_y
,
out_z
,
pts_mask
,
roiaware_maxpool3d
<<<
blocks_pool
,
threads
>>>
(
boxes_num
,
pts_num
,
channels
,
max_pts_each_voxel
,
out_x
,
out_y
,
out_z
,
pts_idx_of_voxels
);
pts_feature
,
pts_idx_of_voxels
,
pooled_features
,
argmax
);
}
dim3
blocks_pool
(
DIVUP
(
out_x
*
out_y
*
out_z
,
THREADS_PER_BLOCK
),
channels
,
else
if
(
pool_method
==
1
){
boxes_num
);
roiaware_avgpool3d
<<<
blocks_pool
,
threads
>>>
(
boxes_num
,
pts_num
,
channels
,
max_pts_each_voxel
,
out_x
,
out_y
,
out_z
,
if
(
pool_method
==
0
)
{
pts_feature
,
pts_idx_of_voxels
,
pooled_features
);
roiaware_maxpool3d
<<<
blocks_pool
,
threads
>>>
(
}
boxes_num
,
pts_num
,
channels
,
max_pts_each_voxel
,
out_x
,
out_y
,
out_z
,
pts_feature
,
pts_idx_of_voxels
,
pooled_features
,
argmax
);
}
else
if
(
pool_method
==
1
)
{
cudaFree
(
pts_mask
);
roiaware_avgpool3d
<<<
blocks_pool
,
threads
>>>
(
boxes_num
,
pts_num
,
channels
,
max_pts_each_voxel
,
out_x
,
out_y
,
out_z
,
pts_feature
,
pts_idx_of_voxels
,
pooled_features
);
}
cudaFree
(
pts_mask
);
#ifdef DEBUG
#ifdef DEBUG
cudaDeviceSynchronize
();
// for using printf in kernel function
cudaDeviceSynchronize
();
// for using printf in kernel function
#endif
#endif
}
}
__global__
void
roiaware_maxpool3d_backward
(
int
boxes_num
,
int
channels
,
__global__
void
roiaware_maxpool3d_backward
(
int
boxes_num
,
int
channels
,
int
out_x
,
int
out_y
,
int
out_z
,
int
out_x
,
int
out_y
,
int
out_z
,
const
int
*
argmax
,
const
float
*
grad_out
,
float
*
grad_in
){
const
int
*
argmax
,
// params argmax: (N, out_x, out_y, out_z, C)
const
float
*
grad_out
,
// params grad_out: (N, out_x, out_y, out_z, C)
float
*
grad_in
)
{
// params grad_in: (npoints, C), return value
// params argmax: (N, out_x, out_y, out_z, C)
// params grad_out: (N, out_x, out_y, out_z, C)
int
box_idx
=
blockIdx
.
z
;
// params grad_in: (npoints, C), return value
int
channel_idx
=
blockIdx
.
y
;
int
voxel_idx_flat
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
box_idx
=
blockIdx
.
z
;
int
channel_idx
=
blockIdx
.
y
;
int
x_idx
=
voxel_idx_flat
/
(
out_y
*
out_z
);
int
voxel_idx_flat
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
y_idx
=
(
voxel_idx_flat
-
x_idx
*
(
out_y
*
out_z
))
/
out_z
;
int
z_idx
=
voxel_idx_flat
%
out_z
;
int
x_idx
=
voxel_idx_flat
/
(
out_y
*
out_z
);
if
(
box_idx
>=
boxes_num
||
channel_idx
>=
channels
||
x_idx
>=
out_x
||
y_idx
>=
out_y
||
z_idx
>=
out_z
)
return
;
int
y_idx
=
(
voxel_idx_flat
-
x_idx
*
(
out_y
*
out_z
))
/
out_z
;
int
z_idx
=
voxel_idx_flat
%
out_z
;
int
offset_base
=
x_idx
*
out_y
*
out_z
+
y_idx
*
out_z
+
z_idx
;
if
(
box_idx
>=
boxes_num
||
channel_idx
>=
channels
||
x_idx
>=
out_x
||
argmax
+=
box_idx
*
out_x
*
out_y
*
out_z
*
channels
+
offset_base
*
channels
+
channel_idx
;
y_idx
>=
out_y
||
z_idx
>=
out_z
)
grad_out
+=
box_idx
*
out_x
*
out_y
*
out_z
*
channels
+
offset_base
*
channels
+
channel_idx
;
return
;
if
(
argmax
[
0
]
==
-
1
)
return
;
int
offset_base
=
x_idx
*
out_y
*
out_z
+
y_idx
*
out_z
+
z_idx
;
argmax
+=
box_idx
*
out_x
*
out_y
*
out_z
*
channels
+
atomicAdd
(
grad_in
+
argmax
[
0
]
*
channels
+
channel_idx
,
grad_out
[
0
]
*
1
);
offset_base
*
channels
+
channel_idx
;
grad_out
+=
box_idx
*
out_x
*
out_y
*
out_z
*
channels
+
offset_base
*
channels
+
channel_idx
;
if
(
argmax
[
0
]
==
-
1
)
return
;
atomicAdd
(
grad_in
+
argmax
[
0
]
*
channels
+
channel_idx
,
grad_out
[
0
]
*
1
);
}
}
__global__
void
roiaware_avgpool3d_backward
(
int
boxes_num
,
int
channels
,
__global__
void
roiaware_avgpool3d_backward
(
int
boxes_num
,
int
channels
,
int
out_x
,
int
out_y
,
int
out_z
,
int
out_x
,
int
out_y
,
int
out_z
,
int
max_pts_each_voxel
,
const
int
*
pts_idx_of_voxels
,
const
float
*
grad_out
,
float
*
grad_in
){
int
max_pts_each_voxel
,
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
const
int
*
pts_idx_of_voxels
,
// params grad_out: (N, out_x, out_y, out_z, C)
const
float
*
grad_out
,
// params grad_in: (npoints, C), return value
float
*
grad_in
)
{
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
int
box_idx
=
blockIdx
.
z
;
// params grad_out: (N, out_x, out_y, out_z, C)
int
channel_idx
=
blockIdx
.
y
;
// params grad_in: (npoints, C), return value
int
voxel_idx_flat
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
box_idx
=
blockIdx
.
z
;
int
x_idx
=
voxel_idx_flat
/
(
out_y
*
out_z
);
int
channel_idx
=
blockIdx
.
y
;
int
y_idx
=
(
voxel_idx_flat
-
x_idx
*
(
out_y
*
out_z
))
/
out_z
;
int
voxel_idx_flat
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
z_idx
=
voxel_idx_flat
%
out_z
;
if
(
box_idx
>=
boxes_num
||
channel_idx
>=
channels
||
x_idx
>=
out_x
||
y_idx
>=
out_y
||
z_idx
>=
out_z
)
return
;
int
x_idx
=
voxel_idx_flat
/
(
out_y
*
out_z
);
int
y_idx
=
(
voxel_idx_flat
-
x_idx
*
(
out_y
*
out_z
))
/
out_z
;
int
offset_base
=
x_idx
*
out_y
*
out_z
+
y_idx
*
out_z
+
z_idx
;
int
z_idx
=
voxel_idx_flat
%
out_z
;
pts_idx_of_voxels
+=
box_idx
*
out_x
*
out_y
*
out_z
*
max_pts_each_voxel
+
offset_base
*
max_pts_each_voxel
;
if
(
box_idx
>=
boxes_num
||
channel_idx
>=
channels
||
x_idx
>=
out_x
||
grad_out
+=
box_idx
*
out_x
*
out_y
*
out_z
*
channels
+
offset_base
*
channels
+
channel_idx
;
y_idx
>=
out_y
||
z_idx
>=
out_z
)
return
;
int
total_pts
=
pts_idx_of_voxels
[
0
];
int
offset_base
=
x_idx
*
out_y
*
out_z
+
y_idx
*
out_z
+
z_idx
;
float
cur_grad
=
1
/
fmaxf
(
float
(
total_pts
),
1.0
);
pts_idx_of_voxels
+=
box_idx
*
out_x
*
out_y
*
out_z
*
max_pts_each_voxel
+
for
(
int
k
=
1
;
k
<=
total_pts
;
k
++
){
offset_base
*
max_pts_each_voxel
;
atomicAdd
(
grad_in
+
pts_idx_of_voxels
[
k
]
*
channels
+
channel_idx
,
grad_out
[
0
]
*
cur_grad
);
grad_out
+=
box_idx
*
out_x
*
out_y
*
out_z
*
channels
+
}
offset_base
*
channels
+
channel_idx
;
int
total_pts
=
pts_idx_of_voxels
[
0
];
float
cur_grad
=
1
/
fmaxf
(
float
(
total_pts
),
1.0
);
for
(
int
k
=
1
;
k
<=
total_pts
;
k
++
)
{
atomicAdd
(
grad_in
+
pts_idx_of_voxels
[
k
]
*
channels
+
channel_idx
,
grad_out
[
0
]
*
cur_grad
);
}
}
}
void
roiaware_pool3d_backward_launcher
(
int
boxes_num
,
int
out_x
,
int
out_y
,
int
out_z
,
int
channels
,
void
roiaware_pool3d_backward_launcher
(
int
boxes_num
,
int
out_x
,
int
out_y
,
int
out_z
,
int
channels
,
int
max_pts_each_voxel
,
int
max_pts_each_voxel
,
const
int
*
pts_idx_of_voxels
,
const
int
*
argmax
,
const
float
*
grad_out
,
float
*
grad_in
,
int
pool_method
){
const
int
*
pts_idx_of_voxels
,
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
const
int
*
argmax
,
const
float
*
grad_out
,
// params argmax: (N, out_x, out_y, out_z, C)
float
*
grad_in
,
int
pool_method
)
{
// params
grad_out
: (N, out_x, out_y, out_z,
C
)
// params
pts_idx_of_voxels
: (N, out_x, out_y, out_z,
max_pts_each_voxel
)
// params
grad_in: (npoints, C), return value
// params
argmax: (N, out_x, out_y, out_z, C)
// params
pool_method: 0: max_pool, 1: avg_pool
// params
grad_out: (N, out_x, out_y, out_z, C)
// params grad_in: (npoints, C), return value
dim3
blocks
(
DIVUP
(
out_x
*
out_y
*
out_z
,
THREADS_PER_BLOCK
),
channels
,
boxes_num
);
// params pool_method: 0: max_pool, 1: avg_pool
dim3
threads
(
THREADS_PER_BLOCK
);
if
(
pool_method
==
0
){
dim3
blocks
(
DIVUP
(
out_x
*
out_y
*
out_z
,
THREADS_PER_BLOCK
),
channels
,
roiaware_maxpool3d_backward
<<<
blocks
,
threads
>>>
(
boxes_num
);
boxes_num
,
channels
,
out_x
,
out_y
,
out_z
,
argmax
,
grad_out
,
grad_in
dim3
threads
(
THREADS_PER_BLOCK
);
);
if
(
pool_method
==
0
)
{
}
roiaware_maxpool3d_backward
<<<
blocks
,
threads
>>>
(
else
if
(
pool_method
==
1
){
boxes_num
,
channels
,
out_x
,
out_y
,
out_z
,
argmax
,
grad_out
,
grad_in
);
roiaware_avgpool3d_backward
<<<
blocks
,
threads
>>>
(
}
else
if
(
pool_method
==
1
)
{
boxes_num
,
channels
,
out_x
,
out_y
,
out_z
,
max_pts_each_voxel
,
pts_idx_of_voxels
,
grad_out
,
grad_in
roiaware_avgpool3d_backward
<<<
blocks
,
threads
>>>
(
);
boxes_num
,
channels
,
out_x
,
out_y
,
out_z
,
max_pts_each_voxel
,
}
pts_idx_of_voxels
,
grad_out
,
grad_in
);
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment