Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
bce7d0c3
Commit
bce7d0c3
authored
May 12, 2020
by
yinchimaoliang
Browse files
Merge branch 'master_temp' into indoor_pipeline
parents
1756485e
868c5fab
Changes
21
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
674 additions
and
317 deletions
+674
-317
configs/kitti/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
...igs/kitti/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
+2
-1
configs/kitti/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py
.../kitti/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py
+2
-1
configs/kitti/dv_second_secfpn_6x8_80e_kitti-3d-car.py
configs/kitti/dv_second_secfpn_6x8_80e_kitti-3d-car.py
+2
-1
configs/kitti/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
...igs/kitti/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
+2
-1
configs/kitti/hv_second_secfpn_6x8_80e_kitti-3d-car.py
configs/kitti/hv_second_secfpn_6x8_80e_kitti-3d-car.py
+2
-1
mmdet3d/core/bbox/iou_calculators/iou3d_calculator.py
mmdet3d/core/bbox/iou_calculators/iou3d_calculator.py
+22
-6
mmdet3d/core/bbox/samplers/__init__.py
mmdet3d/core/bbox/samplers/__init__.py
+2
-1
mmdet3d/core/bbox/samplers/iou_neg_piecewise_sampler.py
mmdet3d/core/bbox/samplers/iou_neg_piecewise_sampler.py
+154
-0
mmdet3d/datasets/kitti_dataset.py
mmdet3d/datasets/kitti_dataset.py
+2
-2
mmdet3d/datasets/pipelines/data_augment_utils.py
mmdet3d/datasets/pipelines/data_augment_utils.py
+5
-0
mmdet3d/ops/iou3d/__init__.py
mmdet3d/ops/iou3d/__init__.py
+6
-3
mmdet3d/ops/iou3d/iou3d_utils.py
mmdet3d/ops/iou3d/iou3d_utils.py
+93
-18
mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cuda.cu
mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cuda.cu
+1
-1
mmdet3d/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
mmdet3d/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
+318
-274
mmdet3d/ops/sparse_block.py
mmdet3d/ops/sparse_block.py
+1
-1
tests/test_samplers.py
tests/test_samplers.py
+40
-0
tools/dist_test.sh
tools/dist_test.sh
+10
-0
tools/dist_train.sh
tools/dist_train.sh
+3
-3
tools/slurm_test.sh
tools/slurm_test.sh
+3
-1
tools/slurm_train.sh
tools/slurm_train.sh
+4
-2
No files found.
configs/kitti/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
View file @
bce7d0c3
...
@@ -93,7 +93,8 @@ class_names = ['Car']
...
@@ -93,7 +93,8 @@ class_names = ['Car']
img_norm_cfg
=
dict
(
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
input_modality
=
dict
(
input_modality
=
dict
(
use_lidar
=
True
,
use_lidar
=
False
,
use_lidar_reduced
=
True
,
use_depth
=
False
,
use_depth
=
False
,
use_lidar_intensity
=
True
,
use_lidar_intensity
=
True
,
use_camera
=
False
,
use_camera
=
False
,
...
...
configs/kitti/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py
View file @
bce7d0c3
...
@@ -113,7 +113,8 @@ class_names = ['Pedestrian', 'Cyclist', 'Car']
...
@@ -113,7 +113,8 @@ class_names = ['Pedestrian', 'Cyclist', 'Car']
img_norm_cfg
=
dict
(
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
input_modality
=
dict
(
input_modality
=
dict
(
use_lidar
=
True
,
use_lidar
=
False
,
use_lidar_reduced
=
True
,
use_depth
=
False
,
use_depth
=
False
,
use_lidar_intensity
=
True
,
use_lidar_intensity
=
True
,
use_camera
=
True
,
use_camera
=
True
,
...
...
configs/kitti/dv_second_secfpn_6x8_80e_kitti-3d-car.py
View file @
bce7d0c3
...
@@ -91,7 +91,8 @@ class_names = ['Car']
...
@@ -91,7 +91,8 @@ class_names = ['Car']
img_norm_cfg
=
dict
(
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
input_modality
=
dict
(
input_modality
=
dict
(
use_lidar
=
True
,
use_lidar
=
False
,
use_lidar_reduced
=
True
,
use_depth
=
False
,
use_depth
=
False
,
use_lidar_intensity
=
True
,
use_lidar_intensity
=
True
,
use_camera
=
True
,
use_camera
=
True
,
...
...
configs/kitti/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
View file @
bce7d0c3
...
@@ -90,7 +90,8 @@ class_names = ['Car']
...
@@ -90,7 +90,8 @@ class_names = ['Car']
img_norm_cfg
=
dict
(
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
input_modality
=
dict
(
input_modality
=
dict
(
use_lidar
=
True
,
use_lidar
=
False
,
use_lidar_reduced
=
True
,
use_depth
=
False
,
use_depth
=
False
,
use_lidar_intensity
=
True
,
use_lidar_intensity
=
True
,
use_camera
=
False
,
use_camera
=
False
,
...
...
configs/kitti/hv_second_secfpn_6x8_80e_kitti-3d-car.py
View file @
bce7d0c3
...
@@ -89,7 +89,8 @@ class_names = ['Car']
...
@@ -89,7 +89,8 @@ class_names = ['Car']
img_norm_cfg
=
dict
(
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
input_modality
=
dict
(
input_modality
=
dict
(
use_lidar
=
True
,
use_lidar
=
False
,
use_lidar_reduced
=
True
,
use_depth
=
False
,
use_depth
=
False
,
use_lidar_intensity
=
True
,
use_lidar_intensity
=
True
,
use_camera
=
False
,
use_camera
=
False
,
...
...
mmdet3d/core/bbox/iou_calculators/iou3d_calculator.py
View file @
bce7d0c3
import
torch
import
torch
from
mmdet3d.ops.iou3d
import
boxes_iou3d_gpu
from
mmdet3d.ops.iou3d
import
boxes_iou3d_gpu
_camera
,
boxes_iou3d_gpu_lidar
from
mmdet.core.bbox
import
bbox_overlaps
from
mmdet.core.bbox
import
bbox_overlaps
from
mmdet.core.bbox.iou_calculators.builder
import
IOU_CALCULATORS
from
mmdet.core.bbox.iou_calculators.builder
import
IOU_CALCULATORS
from
..
import
box_torch_ops
from
..
import
box_torch_ops
...
@@ -22,10 +22,18 @@ class BboxOverlapsNearest3D(object):
...
@@ -22,10 +22,18 @@ class BboxOverlapsNearest3D(object):
@
IOU_CALCULATORS
.
register_module
()
@
IOU_CALCULATORS
.
register_module
()
class
BboxOverlaps3D
(
object
):
class
BboxOverlaps3D
(
object
):
"""3D IoU Calculator
"""
"""3D IoU Calculator
def
__call__
(
self
,
bboxes1
,
bboxes2
,
mode
=
'iou'
,
is_aligned
=
False
):
Args:
return
bbox_overlaps_3d
(
bboxes1
,
bboxes2
,
mode
,
is_aligned
)
coordinate (str): 'camera' or 'lidar' coordinate system
"""
def
__init__
(
self
,
coordinate
):
assert
coordinate
in
[
'camera'
,
'lidar'
]
self
.
coordinate
=
coordinate
def
__call__
(
self
,
bboxes1
,
bboxes2
,
mode
=
'iou'
):
return
bbox_overlaps_3d
(
bboxes1
,
bboxes2
,
mode
,
self
.
coordinate
)
def
__repr__
(
self
):
def
__repr__
(
self
):
repr_str
=
self
.
__class__
.
__name__
repr_str
=
self
.
__class__
.
__name__
...
@@ -62,7 +70,7 @@ def bbox_overlaps_nearest_3d(bboxes1, bboxes2, mode='iou', is_aligned=False):
...
@@ -62,7 +70,7 @@ def bbox_overlaps_nearest_3d(bboxes1, bboxes2, mode='iou', is_aligned=False):
return
ret
return
ret
def
bbox_overlaps_3d
(
bboxes1
,
bboxes2
,
mode
=
'iou'
):
def
bbox_overlaps_3d
(
bboxes1
,
bboxes2
,
mode
=
'iou'
,
coordinate
=
'camera'
):
"""Calculate 3D IoU using cuda implementation
"""Calculate 3D IoU using cuda implementation
Args:
Args:
...
@@ -70,6 +78,7 @@ def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou'):
...
@@ -70,6 +78,7 @@ def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou'):
bboxes2: Tensor, shape (M, 7) [x, y, z, h, w, l, ry]
bboxes2: Tensor, shape (M, 7) [x, y, z, h, w, l, ry]
mode: mode (str): "iou" (intersection over union) or
mode: mode (str): "iou" (intersection over union) or
iof (intersection over foreground).
iof (intersection over foreground).
coordinate (str): 'camera' or 'lidar' coordinate system
Return:
Return:
iou: (M, N) not support aligned mode currently
iou: (M, N) not support aligned mode currently
...
@@ -77,4 +86,11 @@ def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou'):
...
@@ -77,4 +86,11 @@ def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou'):
# TODO: check the input dimension meanings,
# TODO: check the input dimension meanings,
# this is inconsistent with that in bbox_overlaps_nearest_3d
# this is inconsistent with that in bbox_overlaps_nearest_3d
assert
bboxes1
.
size
(
-
1
)
==
bboxes2
.
size
(
-
1
)
==
7
assert
bboxes1
.
size
(
-
1
)
==
bboxes2
.
size
(
-
1
)
==
7
return
boxes_iou3d_gpu
(
bboxes1
,
bboxes2
,
mode
)
assert
coordinate
in
[
'camera'
,
'lidar'
]
if
coordinate
==
'camera'
:
return
boxes_iou3d_gpu_camera
(
bboxes1
,
bboxes2
,
mode
)
elif
coordinate
==
'lidar'
:
return
boxes_iou3d_gpu_lidar
(
bboxes1
,
bboxes2
,
mode
)
else
:
raise
NotImplementedError
mmdet3d/core/bbox/samplers/__init__.py
View file @
bce7d0c3
...
@@ -3,9 +3,10 @@ from mmdet.core.bbox.samplers import (BaseSampler, CombinedSampler,
...
@@ -3,9 +3,10 @@ from mmdet.core.bbox.samplers import (BaseSampler, CombinedSampler,
IoUBalancedNegSampler
,
OHEMSampler
,
IoUBalancedNegSampler
,
OHEMSampler
,
PseudoSampler
,
RandomSampler
,
PseudoSampler
,
RandomSampler
,
SamplingResult
)
SamplingResult
)
from
.iou_neg_piecewise_sampler
import
IoUNegPiecewiseSampler
__all__
=
[
__all__
=
[
'BaseSampler'
,
'PseudoSampler'
,
'RandomSampler'
,
'BaseSampler'
,
'PseudoSampler'
,
'RandomSampler'
,
'InstanceBalancedPosSampler'
,
'IoUBalancedNegSampler'
,
'CombinedSampler'
,
'InstanceBalancedPosSampler'
,
'IoUBalancedNegSampler'
,
'CombinedSampler'
,
'OHEMSampler'
,
'SamplingResult'
'OHEMSampler'
,
'SamplingResult'
,
'IoUNegPiecewiseSampler'
]
]
mmdet3d/core/bbox/samplers/iou_neg_piecewise_sampler.py
0 → 100644
View file @
bce7d0c3
import
torch
from
mmdet.core.bbox.builder
import
BBOX_SAMPLERS
from
.
import
RandomSampler
,
SamplingResult
@
BBOX_SAMPLERS
.
register_module
class
IoUNegPiecewiseSampler
(
RandomSampler
):
"""IoU Piece-wise Sampling
Sampling negtive proposals according to a list of IoU thresholds.
The negtive proposals are divided into several pieces according
to `neg_iou_piece_thrs`. And the ratio of each piece is indicated
by `neg_piece_fractions`.
Args:
num (int): number of proposals.
pos_fraction (float): the fraction of positive proposals.
neg_piece_fractions (list): a list contains fractions that indicates
the ratio of each piece of total negtive samplers.
neg_iou_piece_thrs (list): a list contains IoU thresholds that
indicate the upper bound of this piece.
neg_pos_ub (float): the total ratio to limit the upper bound
number of negtive samples
add_gt_as_proposals (bool): whether to add gt as proposals.
"""
def
__init__
(
self
,
num
,
pos_fraction
=
None
,
neg_piece_fractions
=
None
,
neg_iou_piece_thrs
=
None
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
False
,
return_iou
=
False
):
super
(
IoUNegPiecewiseSampler
,
self
).
__init__
(
num
,
pos_fraction
,
neg_pos_ub
,
add_gt_as_proposals
)
assert
isinstance
(
neg_piece_fractions
,
list
)
assert
len
(
neg_piece_fractions
)
==
len
(
neg_iou_piece_thrs
)
self
.
neg_piece_fractions
=
neg_piece_fractions
self
.
neg_iou_thr
=
neg_iou_piece_thrs
self
.
return_iou
=
return_iou
self
.
neg_piece_num
=
len
(
self
.
neg_piece_fractions
)
def
_sample_pos
(
self
,
assign_result
,
num_expected
,
**
kwargs
):
"""Randomly sample some positive samples."""
pos_inds
=
torch
.
nonzero
(
assign_result
.
gt_inds
>
0
,
as_tuple
=
False
)
if
pos_inds
.
numel
()
!=
0
:
pos_inds
=
pos_inds
.
squeeze
(
1
)
if
pos_inds
.
numel
()
<=
num_expected
:
return
pos_inds
else
:
return
self
.
random_choice
(
pos_inds
,
num_expected
)
def
_sample_neg
(
self
,
assign_result
,
num_expected
,
**
kwargs
):
neg_inds
=
torch
.
nonzero
(
assign_result
.
gt_inds
==
0
)
if
neg_inds
.
numel
()
!=
0
:
neg_inds
=
neg_inds
.
squeeze
(
1
)
if
len
(
neg_inds
)
<=
num_expected
:
return
neg_inds
else
:
neg_inds_choice
=
neg_inds
.
new_zeros
([
0
])
extend_num
=
0
max_overlaps
=
assign_result
.
max_overlaps
[
neg_inds
]
for
piece_inds
in
range
(
self
.
neg_piece_num
):
if
piece_inds
==
self
.
neg_piece_num
-
1
:
# for the last piece
piece_expected_num
=
num_expected
-
len
(
neg_inds_choice
)
min_iou_thr
=
0
else
:
# if the numbers of negative samplers in previous
# pieces are less than the expected number, extend
# the same number in the current piece.
piece_expected_num
=
int
(
num_expected
*
self
.
neg_piece_fractions
[
piece_inds
])
+
extend_num
min_iou_thr
=
self
.
neg_iou_thr
[
piece_inds
+
1
]
max_iou_thr
=
self
.
neg_iou_thr
[
piece_inds
]
piece_neg_inds
=
torch
.
nonzero
(
(
max_overlaps
>=
min_iou_thr
)
&
(
max_overlaps
<
max_iou_thr
)).
view
(
-
1
)
if
len
(
piece_neg_inds
)
<
piece_expected_num
:
neg_inds_choice
=
torch
.
cat
(
[
neg_inds_choice
,
neg_inds
[
piece_neg_inds
]],
dim
=
0
)
extend_num
+=
piece_expected_num
-
len
(
piece_neg_inds
)
else
:
piece_choice
=
self
.
random_choice
(
piece_neg_inds
,
piece_expected_num
)
neg_inds_choice
=
torch
.
cat
(
[
neg_inds_choice
,
neg_inds
[
piece_choice
]],
dim
=
0
)
extend_num
=
0
return
neg_inds_choice
def
sample
(
self
,
assign_result
,
bboxes
,
gt_bboxes
,
gt_labels
=
None
,
**
kwargs
):
"""Sample positive and negative bboxes.
This is a simple implementation of bbox sampling given candidates,
assigning results and ground truth bboxes.
Args:
assign_result (:obj:`AssignResult`): Bbox assigning results.
bboxes (Tensor): Boxes to be sampled from.
gt_bboxes (Tensor): Ground truth bboxes.
gt_labels (Tensor, optional): Class labels of ground truth bboxes.
Returns:
:obj:`SamplingResult`: Sampling result.
"""
if
len
(
bboxes
.
shape
)
<
2
:
bboxes
=
bboxes
[
None
,
:]
gt_flags
=
bboxes
.
new_zeros
((
bboxes
.
shape
[
0
],
),
dtype
=
torch
.
bool
)
if
self
.
add_gt_as_proposals
and
len
(
gt_bboxes
)
>
0
:
if
gt_labels
is
None
:
raise
ValueError
(
'gt_labels must be given when add_gt_as_proposals is True'
)
bboxes
=
torch
.
cat
([
gt_bboxes
,
bboxes
],
dim
=
0
)
assign_result
.
add_gt_
(
gt_labels
)
gt_ones
=
bboxes
.
new_ones
(
gt_bboxes
.
shape
[
0
],
dtype
=
torch
.
bool
)
gt_flags
=
torch
.
cat
([
gt_ones
,
gt_flags
])
num_expected_pos
=
int
(
self
.
num
*
self
.
pos_fraction
)
pos_inds
=
self
.
pos_sampler
.
_sample_pos
(
assign_result
,
num_expected_pos
,
bboxes
=
bboxes
,
**
kwargs
)
# We found that sampled indices have duplicated items occasionally.
# (may be a bug of PyTorch)
pos_inds
=
pos_inds
.
unique
()
num_sampled_pos
=
pos_inds
.
numel
()
num_expected_neg
=
self
.
num
-
num_sampled_pos
if
self
.
neg_pos_ub
>=
0
:
_pos
=
max
(
1
,
num_sampled_pos
)
neg_upper_bound
=
int
(
self
.
neg_pos_ub
*
_pos
)
if
num_expected_neg
>
neg_upper_bound
:
num_expected_neg
=
neg_upper_bound
neg_inds
=
self
.
neg_sampler
.
_sample_neg
(
assign_result
,
num_expected_neg
,
bboxes
=
bboxes
,
**
kwargs
)
neg_inds
=
neg_inds
.
unique
()
sampling_result
=
SamplingResult
(
pos_inds
,
neg_inds
,
bboxes
,
gt_bboxes
,
assign_result
,
gt_flags
)
if
self
.
return_iou
:
# PartA2 needs iou score to regression.
sampling_result
.
iou
=
assign_result
.
max_overlaps
[
torch
.
cat
(
[
pos_inds
,
neg_inds
])]
sampling_result
.
iou
.
detach_
()
return
sampling_result
mmdet3d/datasets/kitti_dataset.py
View file @
bce7d0c3
...
@@ -184,6 +184,8 @@ class KittiDataset(torch_data.Dataset):
...
@@ -184,6 +184,8 @@ class KittiDataset(torch_data.Dataset):
if
self
.
modality
[
'use_depth'
]
and
self
.
modality
[
'use_lidar'
]:
if
self
.
modality
[
'use_depth'
]
and
self
.
modality
[
'use_lidar'
]:
points
=
self
.
get_lidar_depth_reduced
(
sample_idx
)
points
=
self
.
get_lidar_depth_reduced
(
sample_idx
)
elif
self
.
modality
[
'use_lidar'
]:
elif
self
.
modality
[
'use_lidar'
]:
points
=
self
.
get_lidar
(
sample_idx
)
elif
self
.
modality
[
'use_lidar_reduced'
]:
points
=
self
.
get_lidar_reduced
(
sample_idx
)
points
=
self
.
get_lidar_reduced
(
sample_idx
)
elif
self
.
modality
[
'use_depth'
]:
elif
self
.
modality
[
'use_depth'
]:
points
=
self
.
get_pure_depth_reduced
(
sample_idx
)
points
=
self
.
get_pure_depth_reduced
(
sample_idx
)
...
@@ -238,8 +240,6 @@ class KittiDataset(torch_data.Dataset):
...
@@ -238,8 +240,6 @@ class KittiDataset(torch_data.Dataset):
axis
=
1
).
astype
(
np
.
float32
)
axis
=
1
).
astype
(
np
.
float32
)
difficulty
=
annos
[
'difficulty'
]
difficulty
=
annos
[
'difficulty'
]
# this change gt_bboxes_3d to velodyne coordinates
# this change gt_bboxes_3d to velodyne coordinates
import
pdb
pdb
.
set_trace
()
gt_bboxes_3d
=
box_np_ops
.
box_camera_to_lidar
(
gt_bboxes_3d
,
rect
,
gt_bboxes_3d
=
box_np_ops
.
box_camera_to_lidar
(
gt_bboxes_3d
,
rect
,
Trv2c
)
Trv2c
)
# only center format is allowed. so we need to convert
# only center format is allowed. so we need to convert
...
...
mmdet3d/datasets/pipelines/data_augment_utils.py
View file @
bce7d0c3
import
warnings
import
numba
import
numba
import
numpy
as
np
import
numpy
as
np
from
numba.errors
import
NumbaPerformanceWarning
from
mmdet3d.core.bbox
import
box_np_ops
from
mmdet3d.core.bbox
import
box_np_ops
warnings
.
filterwarnings
(
"ignore"
,
category
=
NumbaPerformanceWarning
)
@
numba
.
njit
@
numba
.
njit
def
_rotation_box2d_jit_
(
corners
,
angle
,
rot_mat_T
):
def
_rotation_box2d_jit_
(
corners
,
angle
,
rot_mat_T
):
...
...
mmdet3d/ops/iou3d/__init__.py
View file @
bce7d0c3
from
.iou3d_utils
import
(
boxes_iou3d_gpu
,
boxes_iou
_bev
,
nms_gpu
,
from
.iou3d_utils
import
(
boxes_iou3d_gpu
_camera
,
boxes_iou
3d_gpu_lidar
,
nms_normal_gpu
)
boxes_iou_bev
,
nms_gpu
,
nms_normal_gpu
)
__all__
=
[
'boxes_iou_bev'
,
'boxes_iou3d_gpu'
,
'nms_gpu'
,
'nms_normal_gpu'
]
__all__
=
[
'boxes_iou_bev'
,
'boxes_iou3d_gpu_camera'
,
'nms_gpu'
,
'nms_normal_gpu'
,
'boxes_iou3d_gpu_lidar'
]
mmdet3d/ops/iou3d/iou3d_utils.py
View file @
bce7d0c3
...
@@ -20,17 +20,22 @@ def boxes_iou_bev(boxes_a, boxes_b):
...
@@ -20,17 +20,22 @@ def boxes_iou_bev(boxes_a, boxes_b):
return
ans_iou
return
ans_iou
def
boxes_iou3d_gpu
(
boxes_a
,
boxes_b
,
mode
=
'iou'
):
def
boxes_iou3d_gpu_camera
(
boxes_a
,
boxes_b
,
mode
=
'iou'
):
"""
"""Calculate 3d iou of boxes in camera coordinate
:param boxes_a: (N, 7) [x, y, z, h, w, l, ry]
:param boxes_b: (M, 7) [x, y, z, h, w, l, ry]
Args:
:param mode "iou" (intersection over union) or iof (intersection over
boxes_a (FloatTensor): (N, 7) [x, y, z, h, w, l, ry]
in LiDAR coordinate
boxes_b (FloatTensor): (M, 7) [x, y, z, h, w, l, ry]
mode (str): "iou" (intersection over union) or iof (intersection over
foreground).
foreground).
:return:
ans_iou: (M, N)
Returns:
FloatTensor: (M, N)
"""
"""
boxes_a_bev
=
boxes3d_to_bev_torch
(
boxes_a
)
boxes_b_bev
=
boxes3d_to_bev_torch
(
boxes_b
)
boxes_a_bev
=
boxes3d_to_bev_torch_camera
(
boxes_a
)
boxes_b_bev
=
boxes3d_to_bev_torch_camera
(
boxes_b
)
# bev overlap
# bev overlap
overlaps_bev
=
torch
.
cuda
.
FloatTensor
(
overlaps_bev
=
torch
.
cuda
.
FloatTensor
(
...
@@ -51,15 +56,62 @@ def boxes_iou3d_gpu(boxes_a, boxes_b, mode='iou'):
...
@@ -51,15 +56,62 @@ def boxes_iou3d_gpu(boxes_a, boxes_b, mode='iou'):
# 3d iou
# 3d iou
overlaps_3d
=
overlaps_bev
*
overlaps_h
overlaps_3d
=
overlaps_bev
*
overlaps_h
vol_a
=
(
boxes_a
[:,
3
]
*
boxes_a
[:,
4
]
*
boxes_a
[:,
5
]).
view
(
-
1
,
1
)
vol
ume
_a
=
(
boxes_a
[:,
3
]
*
boxes_a
[:,
4
]
*
boxes_a
[:,
5
]).
view
(
-
1
,
1
)
vol_b
=
(
boxes_b
[:,
3
]
*
boxes_b
[:,
4
]
*
boxes_b
[:,
5
]).
view
(
1
,
-
1
)
vol
ume
_b
=
(
boxes_b
[:,
3
]
*
boxes_b
[:,
4
]
*
boxes_b
[:,
5
]).
view
(
1
,
-
1
)
if
mode
==
'iou'
:
if
mode
==
'iou'
:
# the clamp func is used to avoid division of 0
# the clamp func is used to avoid division of 0
iou3d
=
overlaps_3d
/
torch
.
clamp
(
iou3d
=
overlaps_3d
/
torch
.
clamp
(
vol_a
+
vol_b
-
overlaps_3d
,
min
=
1e-8
)
vol
ume
_a
+
vol
ume
_b
-
overlaps_3d
,
min
=
1e-8
)
else
:
else
:
iou3d
=
overlaps_3d
/
torch
.
clamp
(
vol_a
,
min
=
1e-8
)
iou3d
=
overlaps_3d
/
torch
.
clamp
(
volume_a
,
min
=
1e-8
)
return
iou3d
def
boxes_iou3d_gpu_lidar
(
boxes_a
,
boxes_b
,
mode
=
'iou'
):
"""Calculate 3d iou of boxes in lidar coordinate
Args:
boxes_a (FloatTensor): (N, 7) [x, y, z, w, l, h, ry]
in LiDAR coordinate
boxes_b (FloatTensor): (M, 7) [x, y, z, w, l, h, ry]
mode (str): "iou" (intersection over union) or iof (intersection over
foreground).
:Returns:
FloatTensor: (M, N)
"""
boxes_a_bev
=
boxes3d_to_bev_torch_lidar
(
boxes_a
)
boxes_b_bev
=
boxes3d_to_bev_torch_lidar
(
boxes_b
)
# height overlap
boxes_a_height_max
=
(
boxes_a
[:,
2
]
+
boxes_a
[:,
5
]).
view
(
-
1
,
1
)
boxes_a_height_min
=
boxes_a
[:,
2
].
view
(
-
1
,
1
)
boxes_b_height_max
=
(
boxes_b
[:,
2
]
+
boxes_b
[:,
5
]).
view
(
1
,
-
1
)
boxes_b_height_min
=
boxes_b
[:,
2
].
view
(
1
,
-
1
)
# bev overlap
overlaps_bev
=
boxes_a
.
new_zeros
(
torch
.
Size
((
boxes_a
.
shape
[
0
],
boxes_b
.
shape
[
0
])))
# (N, M)
iou3d_cuda
.
boxes_overlap_bev_gpu
(
boxes_a_bev
.
contiguous
(),
boxes_b_bev
.
contiguous
(),
overlaps_bev
)
max_of_min
=
torch
.
max
(
boxes_a_height_min
,
boxes_b_height_min
)
min_of_max
=
torch
.
min
(
boxes_a_height_max
,
boxes_b_height_max
)
overlaps_h
=
torch
.
clamp
(
min_of_max
-
max_of_min
,
min
=
0
)
# 3d iou
overlaps_3d
=
overlaps_bev
*
overlaps_h
volume_a
=
(
boxes_a
[:,
3
]
*
boxes_a
[:,
4
]
*
boxes_a
[:,
5
]).
view
(
-
1
,
1
)
volume_b
=
(
boxes_b
[:,
3
]
*
boxes_b
[:,
4
]
*
boxes_b
[:,
5
]).
view
(
1
,
-
1
)
if
mode
==
'iou'
:
# the clamp func is used to avoid division of 0
iou3d
=
overlaps_3d
/
torch
.
clamp
(
volume_a
+
volume_b
-
overlaps_3d
,
min
=
1e-8
)
else
:
iou3d
=
overlaps_3d
/
torch
.
clamp
(
volume_a
,
min
=
1e-8
)
return
iou3d
return
iou3d
...
@@ -98,16 +150,39 @@ def nms_normal_gpu(boxes, scores, thresh):
...
@@ -98,16 +150,39 @@ def nms_normal_gpu(boxes, scores, thresh):
return
order
[
keep
[:
num_out
].
cuda
()].
contiguous
()
return
order
[
keep
[:
num_out
].
cuda
()].
contiguous
()
def
boxes3d_to_bev_torch
(
boxes3d
):
def
boxes3d_to_bev_torch_camera
(
boxes3d
):
"""
"""covert boxes3d to bev in in camera coords
:param boxes3d: (N, 7) [x, y, z, h, w, l, ry] in camera coords
:return:
Args:
boxes_bev: (N, 5) [x1, y1, x2, y2, ry]
boxes3d (FloartTensor): (N, 7) [x, y, z, h, w, l, ry] in camera coords
Return:
FloartTensor: (N, 5) [x1, y1, x2, y2, ry]
"""
"""
boxes_bev
=
boxes3d
.
new
(
torch
.
Size
((
boxes3d
.
shape
[
0
],
5
)))
boxes_bev
=
boxes3d
.
new
(
torch
.
Size
((
boxes3d
.
shape
[
0
],
5
)))
cu
,
cv
=
boxes3d
[:,
0
],
boxes3d
[:,
2
]
cu
,
cv
=
boxes3d
[:,
0
],
boxes3d
[:,
2
]
half_l
,
half_w
=
boxes3d
[:,
5
]
/
2
,
boxes3d
[:,
4
]
/
2
half_l
,
half_w
=
boxes3d
[:,
5
]
/
2
,
boxes3d
[:,
4
]
/
2
boxes_bev
[:,
0
],
boxes_bev
[:,
1
]
=
cu
-
half_l
,
cv
-
half_w
boxes_bev
[:,
0
],
boxes_bev
[:,
1
]
=
cu
-
half_l
,
cv
-
half_w
boxes_bev
[:,
2
],
boxes_bev
[:,
3
]
=
cu
+
half_l
,
cv
+
half_w
boxes_bev
[:,
2
],
boxes_bev
[:,
3
]
=
cu
+
half_l
,
cv
+
half_w
boxes_bev
[:,
4
]
=
boxes3d
[:,
6
]
boxes_bev
[:,
4
]
=
boxes3d
[:,
6
]
return
boxes_bev
return
boxes_bev
def
boxes3d_to_bev_torch_lidar
(
boxes3d
):
"""covert boxes3d to bev in in LiDAR coords
Args:
boxes3d (FloartTensor): (N, 7) [x, y, z, w, l, h, ry] in LiDAR coords
Returns:
FloartTensor: (N, 5) [x1, y1, x2, y2, ry]
"""
boxes_bev
=
boxes3d
.
new
(
torch
.
Size
((
boxes3d
.
shape
[
0
],
5
)))
x
,
y
=
boxes3d
[:,
0
],
boxes3d
[:,
1
]
half_l
,
half_w
=
boxes3d
[:,
4
]
/
2
,
boxes3d
[:,
3
]
/
2
boxes_bev
[:,
0
],
boxes_bev
[:,
1
]
=
x
-
half_w
,
y
-
half_l
boxes_bev
[:,
2
],
boxes_bev
[:,
3
]
=
x
+
half_w
,
y
+
half_l
boxes_bev
[:,
4
]
=
boxes3d
[:,
6
]
return
boxes_bev
mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cuda.cu
View file @
bce7d0c3
...
@@ -7,8 +7,8 @@
...
@@ -7,8 +7,8 @@
#include <assert.h>
#include <assert.h>
#include <math.h>
#include <math.h>
#include <stdio.h>
#include <stdio.h>
#include <torch/extension.h>
#include <torch/serialize/tensor.h>
#include <torch/serialize/tensor.h>
#include <torch/types.h>
#define THREADS_PER_BLOCK 256
#define THREADS_PER_BLOCK 256
#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
...
...
mmdet3d/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
View file @
bce7d0c3
//Modified from
//
Modified from
//https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
//
https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
//RoI-aware point cloud feature pooling
//
RoI-aware point cloud feature pooling
//Written by Shaoshuai Shi
//
Written by Shaoshuai Shi
//All Rights Reserved 2019.
//
All Rights Reserved 2019.
#include <torch/serialize/tensor.h>
#include <torch/extension.h>
#include <assert.h>
#include <assert.h>
#include <math.h>
#include <math.h>
#include <stdio.h>
#include <stdio.h>
#include <torch/serialize/tensor.h>
#include <torch/types.h>
#define THREADS_PER_BLOCK 256
#define THREADS_PER_BLOCK 256
#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
#define DIVUP(m,
n) ((m) / (n) + ((m) % (n) > 0))
// #define DEBUG
// #define DEBUG
__device__
inline
void
lidar_to_local_coords
(
float
shift_x
,
float
shift_y
,
__device__
inline
void
lidar_to_local_coords
(
float
shift_x
,
float
shift_y
,
float
rz
,
float
&
local_x
,
float
&
local_y
){
float
rz
,
float
&
local_x
,
// should rotate pi/2 + alpha to translate LiDAR to local
float
&
local_y
)
{
float
rot_angle
=
rz
+
M_PI
/
2
;
// should rotate pi/2 + alpha to translate LiDAR to local
float
cosa
=
cos
(
rot_angle
),
sina
=
sin
(
rot_angle
);
float
rot_angle
=
rz
+
M_PI
/
2
;
local_x
=
shift_x
*
cosa
+
shift_y
*
(
-
sina
);
float
cosa
=
cos
(
rot_angle
),
sina
=
sin
(
rot_angle
);
local_y
=
shift_x
*
sina
+
shift_y
*
cosa
;
local_x
=
shift_x
*
cosa
+
shift_y
*
(
-
sina
);
local_y
=
shift_x
*
sina
+
shift_y
*
cosa
;
}
}
__device__
inline
int
check_pt_in_box3d
(
const
float
*
pt
,
const
float
*
box3d
,
__device__
inline
int
check_pt_in_box3d
(
const
float
*
pt
,
const
float
*
box3d
,
float
&
local_x
,
float
&
local_y
){
float
&
local_x
,
float
&
local_y
)
{
// param pt: (x, y, z)
// param pt: (x, y, z)
// param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the bottom center
// param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the
float
x
=
pt
[
0
],
y
=
pt
[
1
],
z
=
pt
[
2
];
// bottom center
float
cx
=
box3d
[
0
],
cy
=
box3d
[
1
],
cz
=
box3d
[
2
];
float
x
=
pt
[
0
],
y
=
pt
[
1
],
z
=
pt
[
2
];
float
w
=
box3d
[
3
],
l
=
box3d
[
4
],
h
=
box3d
[
5
],
rz
=
box3d
[
6
];
float
cx
=
box3d
[
0
],
cy
=
box3d
[
1
],
cz
=
box3d
[
2
];
cz
+=
h
/
2.0
;
// shift to the center since cz in box3d is the bottom center
float
w
=
box3d
[
3
],
l
=
box3d
[
4
],
h
=
box3d
[
5
],
rz
=
box3d
[
6
];
cz
+=
h
/
2.0
;
// shift to the center since cz in box3d is the bottom center
if
(
fabsf
(
z
-
cz
)
>
h
/
2.0
)
return
0
;
lidar_to_local_coords
(
x
-
cx
,
y
-
cy
,
rz
,
local_x
,
local_y
);
if
(
fabsf
(
z
-
cz
)
>
h
/
2.0
)
return
0
;
float
in_flag
=
(
local_x
>
-
l
/
2.0
)
&
(
local_x
<
l
/
2.0
)
&
(
local_y
>
-
w
/
2.0
)
&
(
local_y
<
w
/
2.0
);
lidar_to_local_coords
(
x
-
cx
,
y
-
cy
,
rz
,
local_x
,
local_y
);
return
in_flag
;
float
in_flag
=
(
local_x
>
-
l
/
2.0
)
&
(
local_x
<
l
/
2.0
)
&
(
local_y
>
-
w
/
2.0
)
&
(
local_y
<
w
/
2.0
);
return
in_flag
;
}
}
__global__
void
generate_pts_mask_for_box3d
(
int
boxes_num
,
int
pts_num
,
__global__
void
generate_pts_mask_for_box3d
(
int
boxes_num
,
int
pts_num
,
int
out_x
,
int
out_y
,
int
out_z
,
int
out_x
,
int
out_y
,
int
out_z
,
const
float
*
rois
,
const
float
*
pts
,
int
*
pts_mask
){
const
float
*
rois
,
const
float
*
pts
,
// params rois: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate
int
*
pts_mask
)
{
// params pts: (npoints, 3) [x, y, z]
// params rois: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate
// params pts_mask: (N, npoints): -1 means point doesnot in this box, otherwise: encode (x_idxs, y_idxs, z_idxs) by binary bit
// params pts: (npoints, 3) [x, y, z]
int
pt_idx
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
// params pts_mask: (N, npoints): -1 means point doesnot in this box,
int
box_idx
=
blockIdx
.
y
;
// otherwise: encode (x_idxs, y_idxs, z_idxs) by binary bit
if
(
pt_idx
>=
pts_num
||
box_idx
>=
boxes_num
)
return
;
int
pt_idx
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
box_idx
=
blockIdx
.
y
;
pts
+=
pt_idx
*
3
;
if
(
pt_idx
>=
pts_num
||
box_idx
>=
boxes_num
)
return
;
rois
+=
box_idx
*
7
;
pts_mask
+=
box_idx
*
pts_num
+
pt_idx
;
pts
+=
pt_idx
*
3
;
rois
+=
box_idx
*
7
;
float
local_x
=
0
,
local_y
=
0
;
pts_mask
+=
box_idx
*
pts_num
+
pt_idx
;
int
cur_in_flag
=
check_pt_in_box3d
(
pts
,
rois
,
local_x
,
local_y
);
float
local_x
=
0
,
local_y
=
0
;
pts_mask
[
0
]
=
-
1
;
int
cur_in_flag
=
check_pt_in_box3d
(
pts
,
rois
,
local_x
,
local_y
);
if
(
cur_in_flag
>
0
){
float
local_z
=
pts
[
2
]
-
rois
[
2
];
pts_mask
[
0
]
=
-
1
;
float
w
=
rois
[
3
],
l
=
rois
[
4
],
h
=
rois
[
5
];
if
(
cur_in_flag
>
0
)
{
float
local_z
=
pts
[
2
]
-
rois
[
2
];
float
x_res
=
l
/
out_x
;
float
w
=
rois
[
3
],
l
=
rois
[
4
],
h
=
rois
[
5
];
float
y_res
=
w
/
out_y
;
float
z_res
=
h
/
out_z
;
float
x_res
=
l
/
out_x
;
float
y_res
=
w
/
out_y
;
unsigned
int
x_idx
=
int
((
local_x
+
l
/
2
)
/
x_res
);
float
z_res
=
h
/
out_z
;
unsigned
int
y_idx
=
int
((
local_y
+
w
/
2
)
/
y_res
);
unsigned
int
z_idx
=
int
(
local_z
/
z_res
);
unsigned
int
x_idx
=
int
((
local_x
+
l
/
2
)
/
x_res
);
unsigned
int
y_idx
=
int
((
local_y
+
w
/
2
)
/
y_res
);
x_idx
=
min
(
max
(
x_idx
,
0
),
out_x
-
1
);
unsigned
int
z_idx
=
int
(
local_z
/
z_res
);
y_idx
=
min
(
max
(
y_idx
,
0
),
out_y
-
1
);
z_idx
=
min
(
max
(
z_idx
,
0
),
out_z
-
1
);
x_idx
=
min
(
max
(
x_idx
,
0
),
out_x
-
1
);
y_idx
=
min
(
max
(
y_idx
,
0
),
out_y
-
1
);
unsigned
int
idx_encoding
=
(
x_idx
<<
16
)
+
(
y_idx
<<
8
)
+
z_idx
;
z_idx
=
min
(
max
(
z_idx
,
0
),
out_z
-
1
);
unsigned
int
idx_encoding
=
(
x_idx
<<
16
)
+
(
y_idx
<<
8
)
+
z_idx
;
#ifdef DEBUG
#ifdef DEBUG
printf
(
"mask: pts_%d(%.3f, %.3f, %.3f), local(%.3f, %.3f, %.3f), idx(%d, %d, %d), res(%.3f, %.3f, %.3f), idx_encoding=%x
\n
"
,
printf
(
pt_idx
,
pts
[
0
],
pts
[
1
],
pts
[
2
],
local_x
,
local_y
,
local_z
,
x_idx
,
y_idx
,
z_idx
,
x_res
,
y_res
,
z_res
,
idx_encoding
);
"mask: pts_%d(%.3f, %.3f, %.3f), local(%.3f, %.3f, %.3f), idx(%d, %d, "
"%d), res(%.3f, %.3f, %.3f), idx_encoding=%x
\n
"
,
pt_idx
,
pts
[
0
],
pts
[
1
],
pts
[
2
],
local_x
,
local_y
,
local_z
,
x_idx
,
y_idx
,
z_idx
,
x_res
,
y_res
,
z_res
,
idx_encoding
);
#endif
#endif
pts_mask
[
0
]
=
idx_encoding
;
pts_mask
[
0
]
=
idx_encoding
;
}
}
}
}
__global__
void
collect_inside_pts_for_box3d
(
int
boxes_num
,
int
pts_num
,
__global__
void
collect_inside_pts_for_box3d
(
int
boxes_num
,
int
pts_num
,
int
max_pts_each_voxel
,
int
max_pts_each_voxel
,
int
out_x
,
int
out_x
,
int
out_y
,
int
out_z
,
const
int
*
pts_mask
,
int
*
pts_idx_of_voxels
){
int
out_y
,
int
out_z
,
// params pts_mask: (N, npoints) 0 or 1
const
int
*
pts_mask
,
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
int
*
pts_idx_of_voxels
)
{
// params pts_mask: (N, npoints) 0 or 1
int
box_idx
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
if
(
box_idx
>=
boxes_num
)
return
;
int
box_idx
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
max_num_pts
=
max_pts_each_voxel
-
1
;
// index 0 is the counter
if
(
box_idx
>=
boxes_num
)
return
;
pts_idx_of_voxels
+=
box_idx
*
out_x
*
out_y
*
out_z
*
max_pts_each_voxel
;
int
max_num_pts
=
max_pts_each_voxel
-
1
;
// index 0 is the counter
for
(
int
k
=
0
;
k
<
pts_num
;
k
++
){
pts_idx_of_voxels
+=
box_idx
*
out_x
*
out_y
*
out_z
*
max_pts_each_voxel
;
if
(
pts_mask
[
box_idx
*
pts_num
+
k
]
!=
-
1
){
unsigned
int
idx_encoding
=
pts_mask
[
box_idx
*
pts_num
+
k
];
for
(
int
k
=
0
;
k
<
pts_num
;
k
++
)
{
unsigned
int
x_idx
=
(
idx_encoding
>>
16
)
&
0xFF
;
if
(
pts_mask
[
box_idx
*
pts_num
+
k
]
!=
-
1
)
{
unsigned
int
y_idx
=
(
idx_encoding
>>
8
)
&
0xFF
;
unsigned
int
idx_encoding
=
pts_mask
[
box_idx
*
pts_num
+
k
];
unsigned
int
z_idx
=
idx_encoding
&
0xFF
;
unsigned
int
x_idx
=
(
idx_encoding
>>
16
)
&
0xFF
;
unsigned
int
base_offset
=
x_idx
*
out_y
*
out_z
*
max_pts_each_voxel
+
y_idx
*
out_z
*
max_pts_each_voxel
+
z_idx
*
max_pts_each_voxel
;
unsigned
int
y_idx
=
(
idx_encoding
>>
8
)
&
0xFF
;
unsigned
int
cnt
=
pts_idx_of_voxels
[
base_offset
];
unsigned
int
z_idx
=
idx_encoding
&
0xFF
;
if
(
cnt
<
max_num_pts
){
unsigned
int
base_offset
=
x_idx
*
out_y
*
out_z
*
max_pts_each_voxel
+
pts_idx_of_voxels
[
base_offset
+
cnt
+
1
]
=
k
;
y_idx
*
out_z
*
max_pts_each_voxel
+
pts_idx_of_voxels
[
base_offset
]
++
;
z_idx
*
max_pts_each_voxel
;
}
unsigned
int
cnt
=
pts_idx_of_voxels
[
base_offset
];
if
(
cnt
<
max_num_pts
)
{
pts_idx_of_voxels
[
base_offset
+
cnt
+
1
]
=
k
;
pts_idx_of_voxels
[
base_offset
]
++
;
}
#ifdef DEBUG
#ifdef DEBUG
printf
(
"collect: pts_%d, idx(%d, %d, %d), idx_encoding=%x
\n
"
,
printf
(
"collect: pts_%d, idx(%d, %d, %d), idx_encoding=%x
\n
"
,
k
,
x_idx
,
k
,
x_idx
,
y_idx
,
z_idx
,
idx_encoding
);
y_idx
,
z_idx
,
idx_encoding
);
#endif
#endif
}
}
}
}
}
}
__global__
void
roiaware_maxpool3d
(
int
boxes_num
,
int
pts_num
,
int
channels
,
__global__
void
roiaware_maxpool3d
(
int
boxes_num
,
int
pts_num
,
int
channels
,
int
max_pts_each_voxel
,
int
out_x
,
int
max_pts_each_voxel
,
int
out_x
,
int
out_y
,
int
out_y
,
int
out_z
,
const
float
*
pts_feature
,
const
int
*
pts_idx_of_voxels
,
float
*
pooled_features
,
int
*
argmax
){
int
out_z
,
const
float
*
pts_feature
,
// params pts_feature: (npoints, C)
const
int
*
pts_idx_of_voxels
,
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel), index 0 is the counter
float
*
pooled_features
,
int
*
argmax
)
{
// params pooled_features: (N, out_x, out_y, out_z, C)
// params pts_feature: (npoints, C)
// params argmax: (N, out_x, out_y, out_z, C)
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel),
// index 0 is the counter params pooled_features: (N, out_x, out_y, out_z, C)
int
box_idx
=
blockIdx
.
z
;
// params argmax: (N, out_x, out_y, out_z, C)
int
channel_idx
=
blockIdx
.
y
;
int
voxel_idx_flat
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
box_idx
=
blockIdx
.
z
;
int
channel_idx
=
blockIdx
.
y
;
int
x_idx
=
voxel_idx_flat
/
(
out_y
*
out_z
);
int
voxel_idx_flat
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
y_idx
=
(
voxel_idx_flat
-
x_idx
*
(
out_y
*
out_z
))
/
out_z
;
int
z_idx
=
voxel_idx_flat
%
out_z
;
int
x_idx
=
voxel_idx_flat
/
(
out_y
*
out_z
);
if
(
box_idx
>=
boxes_num
||
channel_idx
>=
channels
||
x_idx
>=
out_x
||
y_idx
>=
out_y
||
z_idx
>=
out_z
)
return
;
int
y_idx
=
(
voxel_idx_flat
-
x_idx
*
(
out_y
*
out_z
))
/
out_z
;
int
z_idx
=
voxel_idx_flat
%
out_z
;
if
(
box_idx
>=
boxes_num
||
channel_idx
>=
channels
||
x_idx
>=
out_x
||
y_idx
>=
out_y
||
z_idx
>=
out_z
)
return
;
#ifdef DEBUG
#ifdef DEBUG
printf
(
"src pts_idx_of_voxels: (%p, ), argmax: %p
\n
"
,
pts_idx_of_voxels
,
argmax
);
printf
(
"src pts_idx_of_voxels: (%p, ), argmax: %p
\n
"
,
pts_idx_of_voxels
,
argmax
);
#endif
#endif
int
offset_base
=
x_idx
*
out_y
*
out_z
+
y_idx
*
out_z
+
z_idx
;
int
offset_base
=
x_idx
*
out_y
*
out_z
+
y_idx
*
out_z
+
z_idx
;
pts_idx_of_voxels
+=
box_idx
*
out_x
*
out_y
*
out_z
*
max_pts_each_voxel
+
offset_base
*
max_pts_each_voxel
;
pts_idx_of_voxels
+=
box_idx
*
out_x
*
out_y
*
out_z
*
max_pts_each_voxel
+
pooled_features
+=
box_idx
*
out_x
*
out_y
*
out_z
*
channels
+
offset_base
*
channels
+
channel_idx
;
offset_base
*
max_pts_each_voxel
;
argmax
+=
box_idx
*
out_x
*
out_y
*
out_z
*
channels
+
offset_base
*
channels
+
channel_idx
;
pooled_features
+=
box_idx
*
out_x
*
out_y
*
out_z
*
channels
+
offset_base
*
channels
+
channel_idx
;
argmax
+=
box_idx
*
out_x
*
out_y
*
out_z
*
channels
+
offset_base
*
channels
+
channel_idx
;
int
argmax_idx
=
-
1
;
int
argmax_idx
=
-
1
;
float
max_val
=
-
1e50
;
float
max_val
=
-
1e50
;
int
total_pts
=
pts_idx_of_voxels
[
0
];
int
total_pts
=
pts_idx_of_voxels
[
0
];
for
(
int
k
=
1
;
k
<=
total_pts
;
k
++
){
for
(
int
k
=
1
;
k
<=
total_pts
;
k
++
)
{
if
(
pts_feature
[
pts_idx_of_voxels
[
k
]
*
channels
+
channel_idx
]
>
max_val
){
if
(
pts_feature
[
pts_idx_of_voxels
[
k
]
*
channels
+
channel_idx
]
>
max_val
)
{
max_val
=
pts_feature
[
pts_idx_of_voxels
[
k
]
*
channels
+
channel_idx
];
max_val
=
pts_feature
[
pts_idx_of_voxels
[
k
]
*
channels
+
channel_idx
];
argmax_idx
=
pts_idx_of_voxels
[
k
];
argmax_idx
=
pts_idx_of_voxels
[
k
];
}
}
}
}
if
(
argmax_idx
!=
-
1
){
if
(
argmax_idx
!=
-
1
)
{
pooled_features
[
0
]
=
max_val
;
pooled_features
[
0
]
=
max_val
;
}
}
argmax
[
0
]
=
argmax_idx
;
argmax
[
0
]
=
argmax_idx
;
#ifdef DEBUG
#ifdef DEBUG
printf
(
"channel_%d idx(%d, %d, %d), argmax_idx=(%d, %.3f), total=%d, after pts_idx: %p, argmax: (%p, %d)
\n
"
,
printf
(
channel_idx
,
x_idx
,
y_idx
,
z_idx
,
argmax_idx
,
max_val
,
total_pts
,
pts_idx_of_voxels
,
argmax
,
argmax_idx
);
"channel_%d idx(%d, %d, %d), argmax_idx=(%d, %.3f), total=%d, after "
"pts_idx: %p, argmax: (%p, %d)
\n
"
,
channel_idx
,
x_idx
,
y_idx
,
z_idx
,
argmax_idx
,
max_val
,
total_pts
,
pts_idx_of_voxels
,
argmax
,
argmax_idx
);
#endif
#endif
}
}
__global__
void
roiaware_avgpool3d
(
int
boxes_num
,
int
pts_num
,
int
channels
,
__global__
void
roiaware_avgpool3d
(
int
boxes_num
,
int
pts_num
,
int
channels
,
int
max_pts_each_voxel
,
int
out_x
,
int
max_pts_each_voxel
,
int
out_x
,
int
out_y
,
int
out_y
,
int
out_z
,
const
float
*
pts_feature
,
const
int
*
pts_idx_of_voxels
,
float
*
pooled_features
){
int
out_z
,
const
float
*
pts_feature
,
// params pts_feature: (npoints, C)
const
int
*
pts_idx_of_voxels
,
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel), index 0 is the counter
float
*
pooled_features
)
{
// params pooled_features: (N, out_x, out_y, out_z, C)
// params pts_feature: (npoints, C)
// params argmax: (N, out_x, out_y, out_z, C)
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel),
// index 0 is the counter params pooled_features: (N, out_x, out_y, out_z, C)
int
box_idx
=
blockIdx
.
z
;
// params argmax: (N, out_x, out_y, out_z, C)
int
channel_idx
=
blockIdx
.
y
;
int
voxel_idx_flat
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
box_idx
=
blockIdx
.
z
;
int
channel_idx
=
blockIdx
.
y
;
int
x_idx
=
voxel_idx_flat
/
(
out_y
*
out_z
);
int
voxel_idx_flat
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
y_idx
=
(
voxel_idx_flat
-
x_idx
*
(
out_y
*
out_z
))
/
out_z
;
int
z_idx
=
voxel_idx_flat
%
out_z
;
int
x_idx
=
voxel_idx_flat
/
(
out_y
*
out_z
);
if
(
box_idx
>=
boxes_num
||
channel_idx
>=
channels
||
x_idx
>=
out_x
||
y_idx
>=
out_y
||
z_idx
>=
out_z
)
return
;
int
y_idx
=
(
voxel_idx_flat
-
x_idx
*
(
out_y
*
out_z
))
/
out_z
;
int
z_idx
=
voxel_idx_flat
%
out_z
;
int
offset_base
=
x_idx
*
out_y
*
out_z
+
y_idx
*
out_z
+
z_idx
;
if
(
box_idx
>=
boxes_num
||
channel_idx
>=
channels
||
x_idx
>=
out_x
||
pts_idx_of_voxels
+=
box_idx
*
out_x
*
out_y
*
out_z
*
max_pts_each_voxel
+
offset_base
*
max_pts_each_voxel
;
y_idx
>=
out_y
||
z_idx
>=
out_z
)
pooled_features
+=
box_idx
*
out_x
*
out_y
*
out_z
*
channels
+
offset_base
*
channels
+
channel_idx
;
return
;
float
sum_val
=
0
;
int
offset_base
=
x_idx
*
out_y
*
out_z
+
y_idx
*
out_z
+
z_idx
;
int
total_pts
=
pts_idx_of_voxels
[
0
];
pts_idx_of_voxels
+=
box_idx
*
out_x
*
out_y
*
out_z
*
max_pts_each_voxel
+
offset_base
*
max_pts_each_voxel
;
for
(
int
k
=
1
;
k
<=
total_pts
;
k
++
){
pooled_features
+=
box_idx
*
out_x
*
out_y
*
out_z
*
channels
+
sum_val
+=
pts_feature
[
pts_idx_of_voxels
[
k
]
*
channels
+
channel_idx
];
offset_base
*
channels
+
channel_idx
;
}
float
sum_val
=
0
;
if
(
total_pts
>
0
){
int
total_pts
=
pts_idx_of_voxels
[
0
];
pooled_features
[
0
]
=
sum_val
/
total_pts
;
}
for
(
int
k
=
1
;
k
<=
total_pts
;
k
++
)
{
sum_val
+=
pts_feature
[
pts_idx_of_voxels
[
k
]
*
channels
+
channel_idx
];
}
if
(
total_pts
>
0
)
{
pooled_features
[
0
]
=
sum_val
/
total_pts
;
}
}
}
void
roiaware_pool3d_launcher
(
int
boxes_num
,
int
pts_num
,
int
channels
,
int
max_pts_each_voxel
,
int
out_x
,
int
out_y
,
void
roiaware_pool3d_launcher
(
int
boxes_num
,
int
pts_num
,
int
channels
,
int
max_pts_each_voxel
,
int
out_x
,
int
out_y
,
int
out_z
,
int
out_z
,
const
float
*
rois
,
const
float
*
pts
,
const
float
*
rois
,
const
float
*
pts
,
const
float
*
pts_feature
,
int
*
argmax
,
int
*
pts_idx_of_voxels
,
float
*
pooled_features
,
int
pool_method
){
const
float
*
pts_feature
,
int
*
argmax
,
// params rois: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate
int
*
pts_idx_of_voxels
,
float
*
pooled_features
,
// params pts: (npoints, 3) [x, y, z] in LiDAR coordinate
int
pool_method
)
{
// params pts_feature: (npoints, C)
// params rois: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate
// params argmax: (N, out_x, out_y, out_z, C)
// params pts: (npoints, 3) [x, y, z] in LiDAR coordinate
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
// params pts_feature: (npoints, C)
// params pooled_features: (N, out_x, out_y, out_z, C)
// params argmax: (N, out_x, out_y, out_z, C)
// params pool_method: 0: max_pool 1: avg_pool
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
// params pooled_features: (N, out_x, out_y, out_z, C)
int
*
pts_mask
=
NULL
;
// params pool_method: 0: max_pool 1: avg_pool
cudaMalloc
(
&
pts_mask
,
boxes_num
*
pts_num
*
sizeof
(
int
));
// (N, M)
cudaMemset
(
pts_mask
,
-
1
,
boxes_num
*
pts_num
*
sizeof
(
int
));
int
*
pts_mask
=
NULL
;
cudaMalloc
(
&
pts_mask
,
boxes_num
*
pts_num
*
sizeof
(
int
));
// (N, M)
dim3
blocks_mask
(
DIVUP
(
pts_num
,
THREADS_PER_BLOCK
),
boxes_num
);
cudaMemset
(
pts_mask
,
-
1
,
boxes_num
*
pts_num
*
sizeof
(
int
));
dim3
threads
(
THREADS_PER_BLOCK
);
generate_pts_mask_for_box3d
<<<
blocks_mask
,
threads
>>>
(
boxes_num
,
pts_num
,
out_x
,
out_y
,
out_z
,
rois
,
pts
,
pts_mask
);
dim3
blocks_mask
(
DIVUP
(
pts_num
,
THREADS_PER_BLOCK
),
boxes_num
);
dim3
threads
(
THREADS_PER_BLOCK
);
// TODO: Merge the collect and pool functions, SS
generate_pts_mask_for_box3d
<<<
blocks_mask
,
threads
>>>
(
boxes_num
,
pts_num
,
out_x
,
out_y
,
out_z
,
rois
,
pts
,
pts_mask
);
dim3
blocks_collect
(
DIVUP
(
boxes_num
,
THREADS_PER_BLOCK
));
collect_inside_pts_for_box3d
<<<
blocks_collect
,
threads
>>>
(
boxes_num
,
pts_num
,
max_pts_each_voxel
,
// TODO: Merge the collect and pool functions, SS
out_x
,
out_y
,
out_z
,
pts_mask
,
pts_idx_of_voxels
);
dim3
blocks_collect
(
DIVUP
(
boxes_num
,
THREADS_PER_BLOCK
));
dim3
blocks_pool
(
DIVUP
(
out_x
*
out_y
*
out_z
,
THREADS_PER_BLOCK
),
channels
,
boxes_num
);
collect_inside_pts_for_box3d
<<<
blocks_collect
,
threads
>>>
(
if
(
pool_method
==
0
){
boxes_num
,
pts_num
,
max_pts_each_voxel
,
out_x
,
out_y
,
out_z
,
pts_mask
,
roiaware_maxpool3d
<<<
blocks_pool
,
threads
>>>
(
boxes_num
,
pts_num
,
channels
,
max_pts_each_voxel
,
out_x
,
out_y
,
out_z
,
pts_idx_of_voxels
);
pts_feature
,
pts_idx_of_voxels
,
pooled_features
,
argmax
);
}
dim3
blocks_pool
(
DIVUP
(
out_x
*
out_y
*
out_z
,
THREADS_PER_BLOCK
),
channels
,
else
if
(
pool_method
==
1
){
boxes_num
);
roiaware_avgpool3d
<<<
blocks_pool
,
threads
>>>
(
boxes_num
,
pts_num
,
channels
,
max_pts_each_voxel
,
out_x
,
out_y
,
out_z
,
if
(
pool_method
==
0
)
{
pts_feature
,
pts_idx_of_voxels
,
pooled_features
);
roiaware_maxpool3d
<<<
blocks_pool
,
threads
>>>
(
}
boxes_num
,
pts_num
,
channels
,
max_pts_each_voxel
,
out_x
,
out_y
,
out_z
,
pts_feature
,
pts_idx_of_voxels
,
pooled_features
,
argmax
);
}
else
if
(
pool_method
==
1
)
{
cudaFree
(
pts_mask
);
roiaware_avgpool3d
<<<
blocks_pool
,
threads
>>>
(
boxes_num
,
pts_num
,
channels
,
max_pts_each_voxel
,
out_x
,
out_y
,
out_z
,
pts_feature
,
pts_idx_of_voxels
,
pooled_features
);
}
cudaFree
(
pts_mask
);
#ifdef DEBUG
#ifdef DEBUG
cudaDeviceSynchronize
();
// for using printf in kernel function
cudaDeviceSynchronize
();
// for using printf in kernel function
#endif
#endif
}
}
__global__
void
roiaware_maxpool3d_backward
(
int
boxes_num
,
int
channels
,
__global__
void
roiaware_maxpool3d_backward
(
int
boxes_num
,
int
channels
,
int
out_x
,
int
out_y
,
int
out_z
,
int
out_x
,
int
out_y
,
int
out_z
,
const
int
*
argmax
,
const
float
*
grad_out
,
float
*
grad_in
){
const
int
*
argmax
,
// params argmax: (N, out_x, out_y, out_z, C)
const
float
*
grad_out
,
// params grad_out: (N, out_x, out_y, out_z, C)
float
*
grad_in
)
{
// params grad_in: (npoints, C), return value
// params argmax: (N, out_x, out_y, out_z, C)
// params grad_out: (N, out_x, out_y, out_z, C)
int
box_idx
=
blockIdx
.
z
;
// params grad_in: (npoints, C), return value
int
channel_idx
=
blockIdx
.
y
;
int
voxel_idx_flat
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
box_idx
=
blockIdx
.
z
;
int
channel_idx
=
blockIdx
.
y
;
int
x_idx
=
voxel_idx_flat
/
(
out_y
*
out_z
);
int
voxel_idx_flat
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
y_idx
=
(
voxel_idx_flat
-
x_idx
*
(
out_y
*
out_z
))
/
out_z
;
int
z_idx
=
voxel_idx_flat
%
out_z
;
int
x_idx
=
voxel_idx_flat
/
(
out_y
*
out_z
);
if
(
box_idx
>=
boxes_num
||
channel_idx
>=
channels
||
x_idx
>=
out_x
||
y_idx
>=
out_y
||
z_idx
>=
out_z
)
return
;
int
y_idx
=
(
voxel_idx_flat
-
x_idx
*
(
out_y
*
out_z
))
/
out_z
;
int
z_idx
=
voxel_idx_flat
%
out_z
;
int
offset_base
=
x_idx
*
out_y
*
out_z
+
y_idx
*
out_z
+
z_idx
;
if
(
box_idx
>=
boxes_num
||
channel_idx
>=
channels
||
x_idx
>=
out_x
||
argmax
+=
box_idx
*
out_x
*
out_y
*
out_z
*
channels
+
offset_base
*
channels
+
channel_idx
;
y_idx
>=
out_y
||
z_idx
>=
out_z
)
grad_out
+=
box_idx
*
out_x
*
out_y
*
out_z
*
channels
+
offset_base
*
channels
+
channel_idx
;
return
;
if
(
argmax
[
0
]
==
-
1
)
return
;
int
offset_base
=
x_idx
*
out_y
*
out_z
+
y_idx
*
out_z
+
z_idx
;
argmax
+=
box_idx
*
out_x
*
out_y
*
out_z
*
channels
+
atomicAdd
(
grad_in
+
argmax
[
0
]
*
channels
+
channel_idx
,
grad_out
[
0
]
*
1
);
offset_base
*
channels
+
channel_idx
;
grad_out
+=
box_idx
*
out_x
*
out_y
*
out_z
*
channels
+
offset_base
*
channels
+
channel_idx
;
if
(
argmax
[
0
]
==
-
1
)
return
;
atomicAdd
(
grad_in
+
argmax
[
0
]
*
channels
+
channel_idx
,
grad_out
[
0
]
*
1
);
}
}
__global__
void
roiaware_avgpool3d_backward
(
int
boxes_num
,
int
channels
,
__global__
void
roiaware_avgpool3d_backward
(
int
boxes_num
,
int
channels
,
int
out_x
,
int
out_y
,
int
out_z
,
int
out_x
,
int
out_y
,
int
out_z
,
int
max_pts_each_voxel
,
const
int
*
pts_idx_of_voxels
,
const
float
*
grad_out
,
float
*
grad_in
){
int
max_pts_each_voxel
,
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
const
int
*
pts_idx_of_voxels
,
// params grad_out: (N, out_x, out_y, out_z, C)
const
float
*
grad_out
,
// params grad_in: (npoints, C), return value
float
*
grad_in
)
{
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
int
box_idx
=
blockIdx
.
z
;
// params grad_out: (N, out_x, out_y, out_z, C)
int
channel_idx
=
blockIdx
.
y
;
// params grad_in: (npoints, C), return value
int
voxel_idx_flat
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
box_idx
=
blockIdx
.
z
;
int
x_idx
=
voxel_idx_flat
/
(
out_y
*
out_z
);
int
channel_idx
=
blockIdx
.
y
;
int
y_idx
=
(
voxel_idx_flat
-
x_idx
*
(
out_y
*
out_z
))
/
out_z
;
int
voxel_idx_flat
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
z_idx
=
voxel_idx_flat
%
out_z
;
if
(
box_idx
>=
boxes_num
||
channel_idx
>=
channels
||
x_idx
>=
out_x
||
y_idx
>=
out_y
||
z_idx
>=
out_z
)
return
;
int
x_idx
=
voxel_idx_flat
/
(
out_y
*
out_z
);
int
y_idx
=
(
voxel_idx_flat
-
x_idx
*
(
out_y
*
out_z
))
/
out_z
;
int
offset_base
=
x_idx
*
out_y
*
out_z
+
y_idx
*
out_z
+
z_idx
;
int
z_idx
=
voxel_idx_flat
%
out_z
;
pts_idx_of_voxels
+=
box_idx
*
out_x
*
out_y
*
out_z
*
max_pts_each_voxel
+
offset_base
*
max_pts_each_voxel
;
if
(
box_idx
>=
boxes_num
||
channel_idx
>=
channels
||
x_idx
>=
out_x
||
grad_out
+=
box_idx
*
out_x
*
out_y
*
out_z
*
channels
+
offset_base
*
channels
+
channel_idx
;
y_idx
>=
out_y
||
z_idx
>=
out_z
)
return
;
int
total_pts
=
pts_idx_of_voxels
[
0
];
int
offset_base
=
x_idx
*
out_y
*
out_z
+
y_idx
*
out_z
+
z_idx
;
float
cur_grad
=
1
/
fmaxf
(
float
(
total_pts
),
1.0
);
pts_idx_of_voxels
+=
box_idx
*
out_x
*
out_y
*
out_z
*
max_pts_each_voxel
+
for
(
int
k
=
1
;
k
<=
total_pts
;
k
++
){
offset_base
*
max_pts_each_voxel
;
atomicAdd
(
grad_in
+
pts_idx_of_voxels
[
k
]
*
channels
+
channel_idx
,
grad_out
[
0
]
*
cur_grad
);
grad_out
+=
box_idx
*
out_x
*
out_y
*
out_z
*
channels
+
}
offset_base
*
channels
+
channel_idx
;
int
total_pts
=
pts_idx_of_voxels
[
0
];
float
cur_grad
=
1
/
fmaxf
(
float
(
total_pts
),
1.0
);
for
(
int
k
=
1
;
k
<=
total_pts
;
k
++
)
{
atomicAdd
(
grad_in
+
pts_idx_of_voxels
[
k
]
*
channels
+
channel_idx
,
grad_out
[
0
]
*
cur_grad
);
}
}
}
void
roiaware_pool3d_backward_launcher
(
int
boxes_num
,
int
out_x
,
int
out_y
,
int
out_z
,
int
channels
,
void
roiaware_pool3d_backward_launcher
(
int
boxes_num
,
int
out_x
,
int
out_y
,
int
out_z
,
int
channels
,
int
max_pts_each_voxel
,
int
max_pts_each_voxel
,
const
int
*
pts_idx_of_voxels
,
const
int
*
argmax
,
const
float
*
grad_out
,
float
*
grad_in
,
int
pool_method
){
const
int
*
pts_idx_of_voxels
,
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
const
int
*
argmax
,
const
float
*
grad_out
,
// params argmax: (N, out_x, out_y, out_z, C)
float
*
grad_in
,
int
pool_method
)
{
// params
grad_out
: (N, out_x, out_y, out_z,
C
)
// params
pts_idx_of_voxels
: (N, out_x, out_y, out_z,
max_pts_each_voxel
)
// params
grad_in: (npoints, C), return value
// params
argmax: (N, out_x, out_y, out_z, C)
// params
pool_method: 0: max_pool, 1: avg_pool
// params
grad_out: (N, out_x, out_y, out_z, C)
// params grad_in: (npoints, C), return value
dim3
blocks
(
DIVUP
(
out_x
*
out_y
*
out_z
,
THREADS_PER_BLOCK
),
channels
,
boxes_num
);
// params pool_method: 0: max_pool, 1: avg_pool
dim3
threads
(
THREADS_PER_BLOCK
);
if
(
pool_method
==
0
){
dim3
blocks
(
DIVUP
(
out_x
*
out_y
*
out_z
,
THREADS_PER_BLOCK
),
channels
,
roiaware_maxpool3d_backward
<<<
blocks
,
threads
>>>
(
boxes_num
);
boxes_num
,
channels
,
out_x
,
out_y
,
out_z
,
argmax
,
grad_out
,
grad_in
dim3
threads
(
THREADS_PER_BLOCK
);
);
if
(
pool_method
==
0
)
{
}
roiaware_maxpool3d_backward
<<<
blocks
,
threads
>>>
(
else
if
(
pool_method
==
1
){
boxes_num
,
channels
,
out_x
,
out_y
,
out_z
,
argmax
,
grad_out
,
grad_in
);
roiaware_avgpool3d_backward
<<<
blocks
,
threads
>>>
(
}
else
if
(
pool_method
==
1
)
{
boxes_num
,
channels
,
out_x
,
out_y
,
out_z
,
max_pts_each_voxel
,
pts_idx_of_voxels
,
grad_out
,
grad_in
roiaware_avgpool3d_backward
<<<
blocks
,
threads
>>>
(
);
boxes_num
,
channels
,
out_x
,
out_y
,
out_z
,
max_pts_each_voxel
,
}
pts_idx_of_voxels
,
grad_out
,
grad_in
);
}
}
}
mmdet3d/ops/sparse_block.py
View file @
bce7d0c3
from
mmcv.cnn
import
build_norm_layer
from
mmcv.cnn
import
build_norm_layer
from
torch
import
nn
from
torch
import
nn
import
mmdet3d.ops.spconv
as
spconv
from
mmdet.models.backbones.resnet
import
BasicBlock
,
Bottleneck
from
mmdet.models.backbones.resnet
import
BasicBlock
,
Bottleneck
from
.
import
spconv
def
conv3x3
(
in_planes
,
out_planes
,
stride
=
1
,
indice_key
=
None
):
def
conv3x3
(
in_planes
,
out_planes
,
stride
=
1
,
indice_key
=
None
):
...
...
tests/test_samplers.py
0 → 100644
View file @
bce7d0c3
import
torch
from
mmdet3d.core.bbox.assigners
import
MaxIoUAssigner
from
mmdet3d.core.bbox.samplers
import
IoUNegPiecewiseSampler
def
test_iou_piecewise_sampler
():
assigner
=
MaxIoUAssigner
(
pos_iou_thr
=
0.55
,
neg_iou_thr
=
0.55
,
min_pos_iou
=
0.55
,
ignore_iof_thr
=-
1
,
iou_calculator
=
dict
(
type
=
'BboxOverlaps3D'
,
coordinate
=
'lidar'
))
bboxes
=
torch
.
tensor
(
[[
32
,
32
,
16
,
8
,
38
,
42
,
-
0.3
],
[
32
,
32
,
16
,
8
,
38
,
42
,
-
0.3
],
[
32
,
32
,
16
,
8
,
38
,
42
,
-
0.3
],
[
32
,
32
,
16
,
8
,
38
,
42
,
-
0.3
],
[
0
,
0
,
0
,
10
,
10
,
10
,
0.2
],
[
10
,
10
,
10
,
20
,
20
,
15
,
0.6
],
[
5
,
5
,
5
,
15
,
15
,
15
,
0.7
],
[
5
,
5
,
5
,
15
,
15
,
15
,
0.7
],
[
5
,
5
,
5
,
15
,
15
,
15
,
0.7
],
[
32
,
32
,
16
,
8
,
38
,
42
,
-
0.3
],
[
32
,
32
,
16
,
8
,
38
,
42
,
-
0.3
],
[
32
,
32
,
16
,
8
,
38
,
42
,
-
0.3
]],
dtype
=
torch
.
float32
).
cuda
()
gt_bboxes
=
torch
.
tensor
(
[[
0
,
0
,
0
,
10
,
10
,
9
,
0.2
],
[
5
,
10
,
10
,
20
,
20
,
15
,
0.6
]],
dtype
=
torch
.
float32
).
cuda
()
gt_labels
=
torch
.
tensor
([
1
,
1
],
dtype
=
torch
.
int64
).
cuda
()
assign_result
=
assigner
.
assign
(
bboxes
,
gt_bboxes
,
gt_labels
=
gt_labels
)
sampler
=
IoUNegPiecewiseSampler
(
num
=
10
,
pos_fraction
=
0.55
,
neg_piece_fractions
=
[
0.8
,
0.2
],
neg_iou_piece_thrs
=
[
0.55
,
0.1
],
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
False
)
sample_result
=
sampler
.
sample
(
assign_result
,
bboxes
,
gt_bboxes
,
gt_labels
)
assert
sample_result
.
pos_inds
==
4
assert
len
(
sample_result
.
pos_bboxes
)
==
len
(
sample_result
.
pos_inds
)
assert
len
(
sample_result
.
neg_bboxes
)
==
len
(
sample_result
.
neg_inds
)
tools/dist_test.sh
0 → 100755
View file @
bce7d0c3
#!/usr/bin/env bash
CONFIG
=
$1
CHECKPOINT
=
$2
GPUS
=
$3
PORT
=
${
PORT
:-
29500
}
PYTHONPATH
=
"
$(
dirname
$0
)
/.."
:
$PYTHONPATH
\
python
-m
torch.distributed.launch
--nproc_per_node
=
$GPUS
--master_port
=
$PORT
\
$(
dirname
"
$0
"
)
/test.py
$CONFIG
$CHECKPOINT
--launcher
pytorch
${
@
:4
}
tools/dist_train.sh
View file @
bce7d0c3
#!/usr/bin/env bash
#!/usr/bin/env bash
PYTHON
=
${
PYTHON
:-
"python"
}
CONFIG
=
$1
CONFIG
=
$1
GPUS
=
$2
GPUS
=
$2
PORT
=
${
PORT
:-
29500
}
$PYTHON
-m
torch.distributed.launch
--nproc_per_node
=
$GPUS
\
PYTHONPATH
=
"
$(
dirname
$0
)
/.."
:
$PYTHONPATH
\
python
-m
torch.distributed.launch
--nproc_per_node
=
$GPUS
--master_port
=
$PORT
\
$(
dirname
"
$0
"
)
/train.py
$CONFIG
--launcher
pytorch
${
@
:3
}
$(
dirname
"
$0
"
)
/train.py
$CONFIG
--launcher
pytorch
${
@
:3
}
tools/slurm_test.sh
View file @
bce7d0c3
#!/usr/bin/env bash
#!/usr/bin/env bash
set
-x
set
-x
export
PYTHONPATH
=
`
pwd
`
:
$PYTHONPATH
PARTITION
=
$1
PARTITION
=
$1
JOB_NAME
=
$2
JOB_NAME
=
$2
...
@@ -9,14 +8,17 @@ CONFIG=$3
...
@@ -9,14 +8,17 @@ CONFIG=$3
CHECKPOINT
=
$4
CHECKPOINT
=
$4
GPUS
=
${
GPUS
:-
8
}
GPUS
=
${
GPUS
:-
8
}
GPUS_PER_NODE
=
${
GPUS_PER_NODE
:-
8
}
GPUS_PER_NODE
=
${
GPUS_PER_NODE
:-
8
}
CPUS_PER_TASK
=
${
CPUS_PER_TASK
:-
5
}
PY_ARGS
=
${
@
:5
}
PY_ARGS
=
${
@
:5
}
SRUN_ARGS
=
${
SRUN_ARGS
:-
""
}
SRUN_ARGS
=
${
SRUN_ARGS
:-
""
}
PYTHONPATH
=
"
$(
dirname
$0
)
/.."
:
$PYTHONPATH
\
srun
-p
${
PARTITION
}
\
srun
-p
${
PARTITION
}
\
--job-name
=
${
JOB_NAME
}
\
--job-name
=
${
JOB_NAME
}
\
--gres
=
gpu:
${
GPUS_PER_NODE
}
\
--gres
=
gpu:
${
GPUS_PER_NODE
}
\
--ntasks
=
${
GPUS
}
\
--ntasks
=
${
GPUS
}
\
--ntasks-per-node
=
${
GPUS_PER_NODE
}
\
--ntasks-per-node
=
${
GPUS_PER_NODE
}
\
--cpus-per-task
=
${
CPUS_PER_TASK
}
\
--kill-on-bad-exit
=
1
\
--kill-on-bad-exit
=
1
\
${
SRUN_ARGS
}
\
${
SRUN_ARGS
}
\
python
-u
tools/test.py
${
CONFIG
}
${
CHECKPOINT
}
--launcher
=
"slurm"
${
PY_ARGS
}
python
-u
tools/test.py
${
CONFIG
}
${
CHECKPOINT
}
--launcher
=
"slurm"
${
PY_ARGS
}
tools/slurm_train.sh
View file @
bce7d0c3
...
@@ -8,15 +8,17 @@ CONFIG=$3
...
@@ -8,15 +8,17 @@ CONFIG=$3
WORK_DIR
=
$4
WORK_DIR
=
$4
GPUS
=
${
GPUS
:-
8
}
GPUS
=
${
GPUS
:-
8
}
GPUS_PER_NODE
=
${
GPUS_PER_NODE
:-
8
}
GPUS_PER_NODE
=
${
GPUS_PER_NODE
:-
8
}
CPUS_PER_TASK
=
${
CPUS_PER_TASK
:-
5
}
SRUN_ARGS
=
${
SRUN_ARGS
:-
""
}
SRUN_ARGS
=
${
SRUN_ARGS
:-
""
}
PY_ARGS
=
${
PY_ARGS
:-
"--validate"
}
PY_ARGS
=
${
@
:5
}
PYTHONPATH
=
"
$(
dirname
$0
)
/.."
:
$PYTHONPATH
\
srun
-p
${
PARTITION
}
\
srun
-p
${
PARTITION
}
\
--job-name
=
${
JOB_NAME
}
\
--job-name
=
${
JOB_NAME
}
\
--gres
=
gpu:
${
GPUS_PER_NODE
}
\
--gres
=
gpu:
${
GPUS_PER_NODE
}
\
--ntasks
=
${
GPUS
}
\
--ntasks
=
${
GPUS
}
\
--ntasks-per-node
=
${
GPUS_PER_NODE
}
\
--ntasks-per-node
=
${
GPUS_PER_NODE
}
\
--cpus-per-task
=
${
CPUS_PER_TASK
}
\
--kill-on-bad-exit
=
1
\
--kill-on-bad-exit
=
1
\
${
SRUN_ARGS
}
\
${
SRUN_ARGS
}
\
python
-u
tools/train.py
${
CONFIG
}
--work-dir
=
${
WORK_DIR
}
--launcher
=
"slurm"
${
PY_ARGS
}
python
-u
tools/train.py
${
CONFIG
}
--work-dir
=
${
WORK_DIR
}
--launcher
=
"slurm"
${
PY_ARGS
}
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment