Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
f2b01720
Commit
f2b01720
authored
Jun 10, 2020
by
liyinhao
Browse files
Merge branch 'master' into process_raw_data
parents
08c8adb6
47850641
Changes
40
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1137 additions
and
535 deletions
+1137
-535
mmdet3d/core/bbox/structures/cam_box3d.py
mmdet3d/core/bbox/structures/cam_box3d.py
+47
-18
mmdet3d/core/bbox/structures/depth_box3d.py
mmdet3d/core/bbox/structures/depth_box3d.py
+203
-0
mmdet3d/core/bbox/structures/lidar_box3d.py
mmdet3d/core/bbox/structures/lidar_box3d.py
+83
-20
mmdet3d/core/optimizer/cocktail_constructor.py
mmdet3d/core/optimizer/cocktail_constructor.py
+1
-1
mmdet3d/core/optimizer/cocktail_optimizer.py
mmdet3d/core/optimizer/cocktail_optimizer.py
+9
-2
mmdet3d/datasets/kitti_dataset.py
mmdet3d/datasets/kitti_dataset.py
+6
-5
mmdet3d/datasets/nuscenes_dataset.py
mmdet3d/datasets/nuscenes_dataset.py
+8
-5
mmdet3d/datasets/pipelines/formating.py
mmdet3d/datasets/pipelines/formating.py
+11
-3
mmdet3d/datasets/pipelines/train_aug.py
mmdet3d/datasets/pipelines/train_aug.py
+21
-34
mmdet3d/models/dense_heads/train_mixins.py
mmdet3d/models/dense_heads/train_mixins.py
+2
-0
mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
+2
-4
mmdet3d/models/roi_heads/mask_heads/pointwise_semantic_head.py
...3d/models/roi_heads/mask_heads/pointwise_semantic_head.py
+28
-17
mmdet3d/models/roi_heads/part_aggregation_roi_head.py
mmdet3d/models/roi_heads/part_aggregation_roi_head.py
+5
-5
mmdet3d/models/voxel_encoders/__init__.py
mmdet3d/models/voxel_encoders/__init__.py
+4
-5
mmdet3d/models/voxel_encoders/pillar_encoder.py
mmdet3d/models/voxel_encoders/pillar_encoder.py
+40
-184
mmdet3d/models/voxel_encoders/utils.py
mmdet3d/models/voxel_encoders/utils.py
+13
-31
mmdet3d/models/voxel_encoders/voxel_encoder.py
mmdet3d/models/voxel_encoders/voxel_encoder.py
+152
-180
tests/test_box3d.py
tests/test_box3d.py
+243
-12
tests/test_pipeline/test_outdoor_pipeline.py
tests/test_pipeline/test_outdoor_pipeline.py
+242
-0
tests/test_semantic_heads.py
tests/test_semantic_heads.py
+17
-9
No files found.
mmdet3d/core/bbox/structures/cam_box3d.py
View file @
f2b01720
...
...
@@ -11,10 +11,10 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
Coordinates in camera:
.. code-block:: none
z front
z front
(yaw=0.5*pi)
/
/
0 ------> x right
0 ------> x right
(yaw=0)
|
|
v
...
...
@@ -22,11 +22,15 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
The relative coordinate of bottom center in a CAM box is [0.5, 1.0, 0.5],
and the yaw is around the y axis, thus the rotation axis=1.
The yaw is 0 at the positive direction of x axis, and increases from
the positive direction of x to the positive direction of z.
Attributes:
tensor (torch.Tensor): float matrix of N x box_dim.
box_dim (int): integer indicates the dimension of a box
Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
with_yaw (bool): if True, the value of yaw will be set to 0 as minmax
boxes.
"""
@
property
...
...
@@ -75,7 +79,7 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
"""Calculate the coordinates of corners of all the boxes.
Convert the boxes to in clockwise order, in the form of
(x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z
0
, x1y1z
1
)
(x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z
1
, x1y1z
0
)
.. code-block:: none
...
...
@@ -85,7 +89,7 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
(x0, y0, z1) + ----------- + (x1, y0, z1)
/| / |
/ | / |
(x0, y0, z0) + ----------- + + (x1, y1, z
0
)
(x0, y0, z0) + ----------- + + (x1, y1, z
1
)
| / . | /
| / oriign | /
(x0, y1, z0) + ----------- + -------> x right
...
...
@@ -123,7 +127,7 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
return
self
.
tensor
[:,
[
0
,
2
,
3
,
5
,
6
]]
@
property
def
near
s
et_bev
(
self
):
def
neare
s
t_bev
(
self
):
"""Calculate the 2D bounding boxes in BEV without rotation
Returns:
...
...
@@ -150,11 +154,7 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
"""Calculate whether the points is in any of the boxes
Args:
angles (float | torch.Tensor): rotation angle
Returns:
None if `return_rot_mat=False`,
torch.Tensor if `return_rot_mat=True`
angle (float | torch.Tensor): rotation angle
"""
if
not
isinstance
(
angle
,
torch
.
Tensor
):
angle
=
self
.
tensor
.
new_tensor
(
angle
)
...
...
@@ -166,13 +166,23 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
self
.
tensor
[:,
:
3
]
=
self
.
tensor
[:,
:
3
]
@
rot_mat_T
self
.
tensor
[:,
6
]
+=
angle
def
flip
(
self
):
"""Flip the boxes in horizontal direction
def
flip
(
self
,
bev_direction
=
'horizontal'
):
"""Flip the boxes in BEV along given BEV direction
In CAM coordinates, it flips the x (horizontal) or z (vertical) axis.
In CAM coordinates, it flips the x axis.
Args:
bev_direction (str): Flip direction (horizontal or vertical).
"""
self
.
tensor
[:,
0
::
7
]
=
-
self
.
tensor
[:,
0
::
7
]
self
.
tensor
[:,
6
]
=
-
self
.
tensor
[:,
6
]
+
np
.
pi
assert
bev_direction
in
(
'horizontal'
,
'vertical'
)
if
bev_direction
==
'horizontal'
:
self
.
tensor
[:,
0
::
7
]
=
-
self
.
tensor
[:,
0
::
7
]
if
self
.
with_yaw
:
self
.
tensor
[:,
6
]
=
-
self
.
tensor
[:,
6
]
+
np
.
pi
elif
bev_direction
==
'vertical'
:
self
.
tensor
[:,
2
::
7
]
=
-
self
.
tensor
[:,
2
::
7
]
if
self
.
with_yaw
:
self
.
tensor
[:,
6
]
=
-
self
.
tensor
[:,
6
]
def
in_range_bev
(
self
,
box_range
):
"""Check whether the boxes are in the given range
...
...
@@ -188,8 +198,8 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
TODO: check whether this will effect the performance
Returns:
a binary vector, i
ndicating whether each box is inside
the reference range.
torch.Tensor: I
ndicating whether each box is inside
the reference range.
"""
in_range_flags
=
((
self
.
tensor
[:,
0
]
>
box_range
[
0
])
&
(
self
.
tensor
[:,
2
]
>
box_range
[
1
])
...
...
@@ -230,3 +240,22 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
lowest_of_top
=
torch
.
max
(
boxes1_top_height
,
boxes2_top_height
)
overlaps_h
=
torch
.
clamp
(
heighest_of_bottom
-
lowest_of_top
,
min
=
0
)
return
overlaps_h
def
convert_to
(
self
,
dst
,
rt_mat
=
None
):
"""Convert self to `dst` mode.
Args:
dst (BoxMode): the target Box mode
rt_mat (np.ndarray | torch.Tensor): The rotation and translation
matrix between different coordinates. Defaults to None.
The conversion from `src` coordinates to `dst` coordinates
usually comes along the change of sensors, e.g., from camera
to LiDAR. This requires a transformation matrix.
Returns:
BaseInstance3DBoxes:
The converted box of the same type in the `dst` mode.
"""
from
.box_3d_mode
import
Box3DMode
return
Box3DMode
.
convert
(
box
=
self
,
src
=
Box3DMode
.
CAM
,
dst
=
dst
,
rt_mat
=
rt_mat
)
mmdet3d/core/bbox/structures/depth_box3d.py
0 → 100644
View file @
f2b01720
import
numpy
as
np
import
torch
from
.base_box3d
import
BaseInstance3DBoxes
from
.utils
import
limit_period
,
rotation_3d_in_axis
class
DepthInstance3DBoxes
(
BaseInstance3DBoxes
):
"""3D boxes of instances in Depth coordinates
Coordinates in Depth:
.. code-block:: none
up z y front (yaw=0.5*pi)
^ ^
| /
| /
0 ------> x right (yaw=0)
The relative coordinate of bottom center in a Depth box is [0.5, 0.5, 0],
and the yaw is around the z axis, thus the rotation axis=2.
The yaw is 0 at the positive direction of x axis, and increases from
the positive direction of x to the positive direction of y.
Attributes:
tensor (torch.Tensor): float matrix of N x box_dim.
box_dim (int): integer indicates the dimension of a box
Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
with_yaw (bool): if True, the value of yaw will be set to 0 as minmax
boxes.
"""
@
property
def
gravity_center
(
self
):
"""Calculate the gravity center of all the boxes.
Returns:
torch.Tensor: a tensor with center of each box.
"""
bottom_center
=
self
.
bottom_center
gravity_center
=
torch
.
zeros_like
(
bottom_center
)
gravity_center
[:,
:
2
]
=
bottom_center
[:,
:
2
]
gravity_center
[:,
2
]
=
bottom_center
[:,
2
]
+
self
.
tensor
[:,
5
]
*
0.5
return
gravity_center
@
property
def
corners
(
self
):
"""Calculate the coordinates of corners of all the boxes.
Convert the boxes to corners in clockwise order, in form of
(x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z1, x1y1z0)
.. code-block:: none
up z
front y ^
/ |
/ |
(x0, y1, z1) + ----------- + (x1, y1, z1)
/| / |
/ | / |
(x0, y0, z1) + ----------- + + (x1, y1, z0)
| / . | /
| / oriign | /
(x0, y0, z0) + ----------- + --------> right x
(x1, y0, z0)
Returns:
torch.Tensor: corners of each box with size (N, 8, 3)
"""
dims
=
self
.
dims
corners_norm
=
torch
.
from_numpy
(
np
.
stack
(
np
.
unravel_index
(
np
.
arange
(
8
),
[
2
]
*
3
),
axis
=
1
)).
to
(
device
=
dims
.
device
,
dtype
=
dims
.
dtype
)
corners_norm
=
corners_norm
[[
0
,
1
,
3
,
2
,
4
,
5
,
7
,
6
]]
# use relative origin [0.5, 0.5, 0]
corners_norm
=
corners_norm
-
dims
.
new_tensor
([
0.5
,
0.5
,
0
])
corners
=
dims
.
view
([
-
1
,
1
,
3
])
*
corners_norm
.
reshape
([
1
,
8
,
3
])
# rotate around z axis
corners
=
rotation_3d_in_axis
(
corners
,
self
.
tensor
[:,
6
],
axis
=
2
)
corners
+=
self
.
tensor
[:,
:
3
].
view
(
-
1
,
1
,
3
)
return
corners
@
property
def
bev
(
self
):
"""Calculate the 2D bounding boxes in BEV with rotation
Returns:
torch.Tensor: a nx5 tensor of 2D BEV box of each box.
The box is in XYWHR format
"""
return
self
.
tensor
[:,
[
0
,
1
,
3
,
4
,
6
]]
@
property
def
nearest_bev
(
self
):
"""Calculate the 2D bounding boxes in BEV without rotation
Returns:
torch.Tensor: a tensor of 2D BEV box of each box.
"""
# Obtain BEV boxes with rotation in XYWHR format
bev_rotated_boxes
=
self
.
bev
# convert the rotation to a valid range
rotations
=
bev_rotated_boxes
[:,
-
1
]
normed_rotations
=
torch
.
abs
(
limit_period
(
rotations
,
0.5
,
np
.
pi
))
# find the center of boxes
conditions
=
(
normed_rotations
>
np
.
pi
/
4
)[...,
None
]
bboxes_xywh
=
torch
.
where
(
conditions
,
bev_rotated_boxes
[:,
[
0
,
1
,
3
,
2
]],
bev_rotated_boxes
[:,
:
4
])
centers
=
bboxes_xywh
[:,
:
2
]
dims
=
bboxes_xywh
[:,
2
:]
bev_boxes
=
torch
.
cat
([
centers
-
dims
/
2
,
centers
+
dims
/
2
],
dim
=-
1
)
return
bev_boxes
def
rotate
(
self
,
angle
):
"""Calculate whether the points is in any of the boxes
Args:
angle (float | torch.Tensor): rotation angle
"""
if
not
isinstance
(
angle
,
torch
.
Tensor
):
angle
=
self
.
tensor
.
new_tensor
(
angle
)
rot_sin
=
torch
.
sin
(
angle
)
rot_cos
=
torch
.
cos
(
angle
)
rot_mat
=
self
.
tensor
.
new_tensor
([[
rot_cos
,
-
rot_sin
,
0
],
[
rot_sin
,
rot_cos
,
0
],
[
0
,
0
,
1
]])
self
.
tensor
[:,
0
:
3
]
=
self
.
tensor
[:,
0
:
3
]
@
rot_mat
.
T
if
self
.
with_yaw
:
self
.
tensor
[:,
6
]
-=
angle
else
:
corners_rot
=
self
.
corners
@
rot_mat
.
T
new_x_size
=
corners_rot
[...,
0
].
max
(
dim
=
1
,
keepdim
=
True
)[
0
]
-
corners_rot
[...,
0
].
min
(
dim
=
1
,
keepdim
=
True
)[
0
]
new_y_size
=
corners_rot
[...,
1
].
max
(
dim
=
1
,
keepdim
=
True
)[
0
]
-
corners_rot
[...,
1
].
min
(
dim
=
1
,
keepdim
=
True
)[
0
]
self
.
tensor
[:,
3
:
5
]
=
torch
.
cat
((
new_x_size
,
new_y_size
),
dim
=-
1
)
def
flip
(
self
,
bev_direction
=
'horizontal'
):
"""Flip the boxes in BEV along given BEV direction
In Depth coordinates, it flips x (horizontal) or y (vertical) axis.
Args:
bev_direction (str): Flip direction (horizontal or vertical).
"""
assert
bev_direction
in
(
'horizontal'
,
'vertical'
)
if
bev_direction
==
'horizontal'
:
self
.
tensor
[:,
0
::
7
]
=
-
self
.
tensor
[:,
0
::
7
]
if
self
.
with_yaw
:
self
.
tensor
[:,
6
]
=
-
self
.
tensor
[:,
6
]
+
np
.
pi
elif
bev_direction
==
'vertical'
:
self
.
tensor
[:,
1
::
7
]
=
-
self
.
tensor
[:,
1
::
7
]
if
self
.
with_yaw
:
self
.
tensor
[:,
6
]
=
-
self
.
tensor
[:,
6
]
def
in_range_bev
(
self
,
box_range
):
"""Check whether the boxes are in the given range
Args:
box_range (list | torch.Tensor): the range of box
(x_min, y_min, x_max, y_max)
Note:
In the original implementation of SECOND, checking whether
a box in the range checks whether the points are in a convex
polygon, we try to reduce the burdun for simpler cases.
TODO: check whether this will effect the performance
Returns:
torch.Tensor: Indicating whether each box is inside
the reference range.
"""
in_range_flags
=
((
self
.
tensor
[:,
0
]
>
box_range
[
0
])
&
(
self
.
tensor
[:,
1
]
>
box_range
[
1
])
&
(
self
.
tensor
[:,
0
]
<
box_range
[
2
])
&
(
self
.
tensor
[:,
1
]
<
box_range
[
3
]))
return
in_range_flags
def
convert_to
(
self
,
dst
,
rt_mat
=
None
):
"""Convert self to `dst` mode.
Args:
dst (BoxMode): the target Box mode
rt_mat (np.ndarray | torch.Tensor): The rotation and translation
matrix between different coordinates. Defaults to None.
The conversion from `src` coordinates to `dst` coordinates
usually comes along the change of sensors, e.g., from camera
to LiDAR. This requires a transformation matrix.
Returns:
BaseInstance3DBoxes:
The converted box of the same type in the `dst` mode.
"""
from
.box_3d_mode
import
Box3DMode
return
Box3DMode
.
convert
(
box
=
self
,
src
=
Box3DMode
.
DEPTH
,
dst
=
dst
,
rt_mat
=
rt_mat
)
mmdet3d/core/bbox/structures/lidar_box3d.py
View file @
f2b01720
import
numpy
as
np
import
torch
from
mmdet3d.ops.roiaware_pool3d
import
points_in_boxes_gpu
from
.base_box3d
import
BaseInstance3DBoxes
from
.utils
import
limit_period
,
rotation_3d_in_axis
...
...
@@ -11,19 +12,23 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
Coordinates in LiDAR:
.. code-block:: none
up z x front
^ ^
| /
| /
left y <------ 0
up z x front
(yaw=0.5*pi)
^ ^
| /
| /
(yaw=pi)
left y <------ 0
The relative coordinate of bottom center in a LiDAR box is [0.5, 0.5, 0],
and the yaw is around the z axis, thus the rotation axis=2.
The yaw is 0 at the negative direction of y axis, and increases from
the negative direction of y to the positive direction of x.
Attributes:
tensor (torch.Tensor): float matrix of N x box_dim.
box_dim (int): integer indicates the dimension of a box
Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
with_yaw (bool): if True, the value of yaw will be set to 0 as minmax
boxes.
"""
@
property
...
...
@@ -44,7 +49,7 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
"""Calculate the coordinates of corners of all the boxes.
Convert the boxes to corners in clockwise order, in form of
(x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z
0
, x1y1z
1
)
(x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z
1
, x1y1z
0
)
.. code-block:: none
...
...
@@ -90,7 +95,7 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
return
self
.
tensor
[:,
[
0
,
1
,
3
,
4
,
6
]]
@
property
def
near
s
et_bev
(
self
):
def
neare
s
t_bev
(
self
):
"""Calculate the 2D bounding boxes in BEV without rotation
Returns:
...
...
@@ -117,11 +122,7 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
"""Calculate whether the points is in any of the boxes
Args:
angles (float | torch.Tensor): rotation angle
Returns:
None if `return_rot_mat=False`,
torch.Tensor if `return_rot_mat=True`
angle (float | torch.Tensor): rotation angle
"""
if
not
isinstance
(
angle
,
torch
.
Tensor
):
angle
=
self
.
tensor
.
new_tensor
(
angle
)
...
...
@@ -133,13 +134,27 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
self
.
tensor
[:,
:
3
]
=
self
.
tensor
[:,
:
3
]
@
rot_mat_T
self
.
tensor
[:,
6
]
+=
angle
def
flip
(
self
):
"""Flip the boxes in horizontal direction
if
self
.
tensor
.
shape
[
1
]
==
9
:
# rotate velo vector
self
.
tensor
[:,
7
:
9
]
=
self
.
tensor
[:,
7
:
9
]
@
rot_mat_T
[:
2
,
:
2
]
def
flip
(
self
,
bev_direction
=
'horizontal'
):
"""Flip the boxes in BEV along given BEV direction
In LIDAR coordinates, it flips the y axis.
In LIDAR coordinates, it flips the y (horizontal) or x (vertical) axis.
Args:
bev_direction (str): Flip direction (horizontal or vertical).
"""
self
.
tensor
[:,
1
::
7
]
=
-
self
.
tensor
[:,
1
::
7
]
self
.
tensor
[:,
6
]
=
-
self
.
tensor
[:,
6
]
+
np
.
pi
assert
bev_direction
in
(
'horizontal'
,
'vertical'
)
if
bev_direction
==
'horizontal'
:
self
.
tensor
[:,
1
::
7
]
=
-
self
.
tensor
[:,
1
::
7
]
if
self
.
with_yaw
:
self
.
tensor
[:,
6
]
=
-
self
.
tensor
[:,
6
]
+
np
.
pi
elif
bev_direction
==
'vertical'
:
self
.
tensor
[:,
0
::
7
]
=
-
self
.
tensor
[:,
0
::
7
]
if
self
.
with_yaw
:
self
.
tensor
[:,
6
]
=
-
self
.
tensor
[:,
6
]
def
in_range_bev
(
self
,
box_range
):
"""Check whether the boxes are in the given range
...
...
@@ -155,11 +170,59 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
TODO: check whether this will effect the performance
Returns:
a binary vector, i
ndicating whether each box is inside
the reference range.
torch.Tensor: I
ndicating whether each box is inside
the reference range.
"""
in_range_flags
=
((
self
.
tensor
[:,
0
]
>
box_range
[
0
])
&
(
self
.
tensor
[:,
1
]
>
box_range
[
1
])
&
(
self
.
tensor
[:,
0
]
<
box_range
[
2
])
&
(
self
.
tensor
[:,
1
]
<
box_range
[
3
]))
return
in_range_flags
def
convert_to
(
self
,
dst
,
rt_mat
=
None
):
"""Convert self to `dst` mode.
Args:
dst (BoxMode): the target Box mode
rt_mat (np.ndarray | torch.Tensor): The rotation and translation
matrix between different coordinates. Defaults to None.
The conversion from `src` coordinates to `dst` coordinates
usually comes along the change of sensors, e.g., from camera
to LiDAR. This requires a transformation matrix.
Returns:
BaseInstance3DBoxes:
The converted box of the same type in the `dst` mode.
"""
from
.box_3d_mode
import
Box3DMode
return
Box3DMode
.
convert
(
box
=
self
,
src
=
Box3DMode
.
LIDAR
,
dst
=
dst
,
rt_mat
=
rt_mat
)
def
enlarged_box
(
self
,
extra_width
):
"""Enlarge the length, width and height boxes
Args:
extra_width (float | torch.Tensor): extra width to enlarge the box
Returns:
:obj:LiDARInstance3DBoxes: enlarged boxes
"""
enlarged_boxes
=
self
.
tensor
.
clone
()
enlarged_boxes
[:,
3
:
6
]
+=
extra_width
*
2
# bottom center z minus extra_width
enlarged_boxes
[:,
2
]
-=
extra_width
return
self
.
new_box
(
enlarged_boxes
)
def
points_in_boxes
(
self
,
points
):
"""Find the box which the points are in.
Args:
points (:obj:torch.Tensor): Points in shape Nx3
Returns:
torch.Tensor: The index of box where each point are in.
"""
box_idx
=
points_in_boxes_gpu
(
points
.
unsqueeze
(
0
),
self
.
tensor
.
unsqueeze
(
0
).
to
(
points
.
device
)).
squeeze
(
0
)
return
box_idx
mmdet3d/core/optimizer/cocktail_constructor.py
View file @
f2b01720
from
mmcv.runner.optimizer
import
OPTIMIZER_BUILDERS
,
OPTIMIZERS
from
mmcv.utils
import
build_from_cfg
from
mmdet3d.utils
import
get_root_logger
from
mmdet.core.optimizer
import
OPTIMIZER_BUILDERS
,
OPTIMIZERS
from
.cocktail_optimizer
import
CocktailOptimizer
...
...
mmdet3d/core/optimizer/cocktail_optimizer.py
View file @
f2b01720
from
mmcv.runner.optimizer
import
OPTIMIZERS
from
torch.optim
import
Optimizer
from
mmdet.core.optimizer
import
OPTIMIZERS
@
OPTIMIZERS
.
register_module
()
class
CocktailOptimizer
(
Optimizer
):
...
...
@@ -9,6 +8,11 @@ class CocktailOptimizer(Optimizer):
This optimizer applies the cocktail optimzation for multi-modality models.
Args:
optimizers (list[:obj:`torch.optim.Optimizer`]): The list containing
different optimizers that optimize different parameters
step_intervals (list[int]): Step intervals of each optimizer
"""
def
__init__
(
self
,
optimizers
,
step_intervals
=
None
):
...
...
@@ -18,6 +22,9 @@ class CocktailOptimizer(Optimizer):
self
.
param_groups
+=
optimizer
.
param_groups
if
not
isinstance
(
step_intervals
,
list
):
step_intervals
=
[
1
]
*
len
(
self
.
optimizers
)
assert
len
(
step_intervals
)
==
len
(
optimizers
),
\
'"step_intervals" should contain the same number of intervals as'
\
f
'len(optimizers)=
{
len
(
optimizers
)
}
, got
{
step_intervals
}
'
self
.
step_intervals
=
step_intervals
self
.
num_step_updated
=
0
...
...
mmdet3d/datasets/kitti_dataset.py
View file @
f2b01720
...
...
@@ -9,7 +9,7 @@ import torch
from
mmcv.utils
import
print_log
from
mmdet.datasets
import
DATASETS
from
..core.bbox
import
box_np_ops
from
..core.bbox
import
Box3DMode
,
CameraInstance3DBoxes
,
box_np_ops
from
.custom_3d
import
Custom3DDataset
from
.utils
import
remove_dontcare
...
...
@@ -87,13 +87,14 @@ class KittiDataset(Custom3DDataset):
# print(gt_names, len(loc))
gt_bboxes_3d
=
np
.
concatenate
([
loc
,
dims
,
rots
[...,
np
.
newaxis
]],
axis
=
1
).
astype
(
np
.
float32
)
# this change gt_bboxes_3d to velodyne coordinates
gt_bboxes_3d
=
box_np_ops
.
box_camera_to_lidar
(
gt_bboxes_3d
,
rect
,
Trv2c
)
# convert gt_bboxes_3d to velodyne coordinates
gt_bboxes_3d
=
CameraInstance3DBoxes
(
gt_bboxes_3d
).
convert_to
(
Box3DMode
.
LIDAR
,
np
.
linalg
.
inv
(
rect
@
Trv2c
))
gt_bboxes
=
annos
[
'bbox'
]
selected
=
self
.
drop_arrays_by_name
(
gt_names
,
[
'DontCare'
])
gt_bboxes_3d
=
gt_bboxes_3d
[
selected
].
astype
(
'float32'
)
#
gt_bboxes_3d = gt_bboxes_3d[selected].astype('float32')
gt_bboxes
=
gt_bboxes
[
selected
].
astype
(
'float32'
)
gt_names
=
gt_names
[
selected
]
...
...
mmdet3d/datasets/nuscenes_dataset.py
View file @
f2b01720
...
...
@@ -7,7 +7,7 @@ import pyquaternion
from
nuscenes.utils.data_classes
import
Box
as
NuScenesBox
from
mmdet.datasets
import
DATASETS
from
..core.bbox
import
box_np_ops
from
..core.bbox
import
LiDARInstance3DBoxes
,
box_np_ops
from
.custom_3d
import
Custom3DDataset
...
...
@@ -152,10 +152,6 @@ class NuScenesDataset(Custom3DDataset):
# filter out bbox containing no points
mask
=
info
[
'num_lidar_pts'
]
>
0
gt_bboxes_3d
=
info
[
'gt_boxes'
][
mask
]
# the nuscenes box center is [0.5, 0.5, 0.5], we keep it
# the same as KITTI [0.5, 0.5, 0]
box_np_ops
.
change_box3d_center_
(
gt_bboxes_3d
,
[
0.5
,
0.5
,
0.5
],
[
0.5
,
0.5
,
0
])
gt_names_3d
=
info
[
'gt_names'
][
mask
]
gt_labels_3d
=
[]
for
cat
in
gt_names_3d
:
...
...
@@ -171,6 +167,13 @@ class NuScenesDataset(Custom3DDataset):
gt_velocity
[
nan_mask
]
=
[
0.0
,
0.0
]
gt_bboxes_3d
=
np
.
concatenate
([
gt_bboxes_3d
,
gt_velocity
],
axis
=-
1
)
# the nuscenes box center is [0.5, 0.5, 0.5], we keep it
# the same as KITTI [0.5, 0.5, 0]
gt_bboxes_3d
=
LiDARInstance3DBoxes
(
gt_bboxes_3d
,
box_dim
=
gt_bboxes_3d
.
shape
[
-
1
],
origin
=
[
0.5
,
0.5
,
0.5
])
anns_results
=
dict
(
gt_bboxes_3d
=
gt_bboxes_3d
,
gt_labels_3d
=
gt_labels_3d
,
...
...
mmdet3d/datasets/pipelines/formating.py
View file @
f2b01720
import
numpy
as
np
from
mmcv.parallel
import
DataContainer
as
DC
from
mmdet3d.core.bbox
import
BaseInstance3DBoxes
from
mmdet.datasets.builder
import
PIPELINES
from
mmdet.datasets.pipelines
import
to_tensor
...
...
@@ -39,9 +40,8 @@ class DefaultFormatBundle(object):
img
=
np
.
ascontiguousarray
(
results
[
'img'
].
transpose
(
2
,
0
,
1
))
results
[
'img'
]
=
DC
(
to_tensor
(
img
),
stack
=
True
)
for
key
in
[
'proposals'
,
'gt_bboxes'
,
'gt_bboxes_3d'
,
'gt_bboxes_ignore'
,
'gt_labels'
,
'gt_labels_3d'
,
'pts_instance_mask'
,
'pts_semantic_mask'
'proposals'
,
'gt_bboxes'
,
'gt_bboxes_ignore'
,
'gt_labels'
,
'gt_labels_3d'
,
'pts_instance_mask'
,
'pts_semantic_mask'
]:
if
key
not
in
results
:
continue
...
...
@@ -49,6 +49,14 @@ class DefaultFormatBundle(object):
results
[
key
]
=
DC
([
to_tensor
(
res
)
for
res
in
results
[
key
]])
else
:
results
[
key
]
=
DC
(
to_tensor
(
results
[
key
]))
if
'gt_bboxes_3d'
in
results
:
if
isinstance
(
results
[
'gt_bboxes_3d'
],
BaseInstance3DBoxes
):
results
[
'gt_bboxes_3d'
]
=
DC
(
results
[
'gt_bboxes_3d'
],
cpu_only
=
True
)
else
:
results
[
'gt_bboxes_3d'
]
=
DC
(
to_tensor
(
results
[
'gt_bboxes_3d'
]))
if
'gt_masks'
in
results
:
results
[
'gt_masks'
]
=
DC
(
results
[
'gt_masks'
],
cpu_only
=
True
)
if
'gt_semantic_seg'
in
results
:
...
...
mmdet3d/datasets/pipelines/train_aug.py
View file @
f2b01720
...
...
@@ -26,12 +26,8 @@ class RandomFlip3D(RandomFlip):
self
.
sync_2d
=
sync_2d
def
random_flip_points
(
self
,
gt_bboxes_3d
,
points
):
gt_bboxes_3d
[:,
1
]
=
-
gt_bboxes_3d
[:,
1
]
gt_bboxes_3d
[:,
6
]
=
-
gt_bboxes_3d
[:,
6
]
+
np
.
pi
gt_bboxes_3d
.
flip
()
points
[:,
1
]
=
-
points
[:,
1
]
if
gt_bboxes_3d
.
shape
[
1
]
==
9
:
# flip velocitys at the same time
gt_bboxes_3d
[:,
8
]
=
-
gt_bboxes_3d
[:,
8
]
return
gt_bboxes_3d
,
points
def
__call__
(
self
,
input_dict
):
...
...
@@ -121,10 +117,13 @@ class ObjectSample(object):
gt_bboxes_2d
=
input_dict
[
'gt_bboxes'
]
# Assume for now 3D & 2D bboxes are the same
sampled_dict
=
self
.
db_sampler
.
sample_all
(
gt_bboxes_3d
,
gt_labels_3d
,
gt_bboxes_2d
=
gt_bboxes_2d
,
img
=
img
)
gt_bboxes_3d
.
tensor
.
numpy
(),
gt_labels_3d
,
gt_bboxes_2d
=
gt_bboxes_2d
,
img
=
img
)
else
:
sampled_dict
=
self
.
db_sampler
.
sample_all
(
gt_bboxes_3d
,
gt_labels_3d
,
img
=
None
)
gt_bboxes_3d
.
tensor
.
numpy
()
,
gt_labels_3d
,
img
=
None
)
if
sampled_dict
is
not
None
:
sampled_gt_bboxes_3d
=
sampled_dict
[
'gt_bboxes_3d'
]
...
...
@@ -133,8 +132,9 @@ class ObjectSample(object):
gt_labels_3d
=
np
.
concatenate
([
gt_labels_3d
,
sampled_gt_labels
],
axis
=
0
)
gt_bboxes_3d
=
np
.
concatenate
([
gt_bboxes_3d
,
sampled_gt_bboxes_3d
]).
astype
(
np
.
float32
)
gt_bboxes_3d
=
gt_bboxes_3d
.
new_box
(
np
.
concatenate
(
[
gt_bboxes_3d
.
tensor
.
numpy
(),
sampled_gt_bboxes_3d
]))
points
=
self
.
remove_points_in_boxes
(
points
,
sampled_gt_bboxes_3d
)
# check the points dimension
...
...
@@ -178,14 +178,16 @@ class ObjectNoise(object):
points
=
input_dict
[
'points'
]
# TODO: check this inplace function
numpy_box
=
gt_bboxes_3d
.
tensor
.
numpy
()
noise_per_object_v3_
(
gt
_
b
box
es_3d
,
numpy
_box
,
points
,
rotation_perturb
=
self
.
rot_uniform_noise
,
center_noise_std
=
self
.
loc_noise_std
,
global_random_rot_range
=
self
.
global_rot_range
,
num_try
=
self
.
num_try
)
input_dict
[
'gt_bboxes_3d'
]
=
gt_bboxes_3d
.
astype
(
'float32'
)
input_dict
[
'gt_bboxes_3d'
]
=
gt_bboxes_3d
.
new_box
(
numpy_box
)
input_dict
[
'points'
]
=
points
return
input_dict
...
...
@@ -212,7 +214,7 @@ class GlobalRotScale(object):
def
_trans_bbox_points
(
self
,
gt_boxes
,
points
):
noise_trans
=
np
.
random
.
normal
(
0
,
self
.
trans_normal_noise
[
0
],
3
).
T
points
[:,
:
3
]
+=
noise_trans
gt_boxes
[:,
:
3
]
+=
noise_trans
gt_boxes
.
translate
(
noise_trans
)
return
gt_boxes
,
points
,
noise_trans
def
_rot_bbox_points
(
self
,
gt_boxes
,
points
,
rotation
=
np
.
pi
/
4
):
...
...
@@ -221,16 +223,8 @@ class GlobalRotScale(object):
noise_rotation
=
np
.
random
.
uniform
(
rotation
[
0
],
rotation
[
1
])
points
[:,
:
3
],
rot_mat_T
=
box_np_ops
.
rotation_points_single_angle
(
points
[:,
:
3
],
noise_rotation
,
axis
=
2
)
gt_boxes
[:,
:
3
],
_
=
box_np_ops
.
rotation_points_single_angle
(
gt_boxes
[:,
:
3
],
noise_rotation
,
axis
=
2
)
gt_boxes
[:,
6
]
+=
noise_rotation
if
gt_boxes
.
shape
[
1
]
==
9
:
# rotate velo vector
rot_cos
=
np
.
cos
(
noise_rotation
)
rot_sin
=
np
.
sin
(
noise_rotation
)
rot_mat_T_bev
=
np
.
array
([[
rot_cos
,
-
rot_sin
],
[
rot_sin
,
rot_cos
]],
dtype
=
points
.
dtype
)
gt_boxes
[:,
7
:
9
]
=
gt_boxes
[:,
7
:
9
]
@
rot_mat_T_bev
gt_boxes
.
rotate
(
noise_rotation
)
return
gt_boxes
,
points
,
rot_mat_T
def
_scale_bbox_points
(
self
,
...
...
@@ -240,9 +234,7 @@ class GlobalRotScale(object):
max_scale
=
1.05
):
noise_scale
=
np
.
random
.
uniform
(
min_scale
,
max_scale
)
points
[:,
:
3
]
*=
noise_scale
gt_boxes
[:,
:
6
]
*=
noise_scale
if
gt_boxes
.
shape
[
1
]
==
9
:
gt_boxes
[:,
7
:]
*=
noise_scale
gt_boxes
.
scale
(
noise_scale
)
return
gt_boxes
,
points
,
noise_scale
def
__call__
(
self
,
input_dict
):
...
...
@@ -256,7 +248,7 @@ class GlobalRotScale(object):
gt_bboxes_3d
,
points
,
trans_factor
=
self
.
_trans_bbox_points
(
gt_bboxes_3d
,
points
)
input_dict
[
'gt_bboxes_3d'
]
=
gt_bboxes_3d
.
astype
(
'float32'
)
input_dict
[
'gt_bboxes_3d'
]
=
gt_bboxes_3d
input_dict
[
'points'
]
=
points
input_dict
[
'pcd_scale_factor'
]
=
scale_factor
input_dict
[
'pcd_rotation'
]
=
rotation_factor
...
...
@@ -290,10 +282,6 @@ class ObjectRangeFilter(object):
self
.
pcd_range
=
np
.
array
(
point_cloud_range
,
dtype
=
np
.
float32
)
self
.
bev_range
=
self
.
pcd_range
[[
0
,
1
,
3
,
4
]]
@
staticmethod
def
limit_period
(
val
,
offset
=
0.5
,
period
=
np
.
pi
):
return
val
-
np
.
floor
(
val
/
period
+
offset
)
*
period
@
staticmethod
def
filter_gt_box_outside_range
(
gt_bboxes_3d
,
limit_range
):
"""remove gtbox outside training range.
...
...
@@ -314,14 +302,13 @@ class ObjectRangeFilter(object):
def
__call__
(
self
,
input_dict
):
gt_bboxes_3d
=
input_dict
[
'gt_bboxes_3d'
]
gt_labels_3d
=
input_dict
[
'gt_labels_3d'
]
mask
=
self
.
filter_
gt_box
_outside_range
(
gt_bboxes_3d
,
self
.
bev_range
)
mask
=
gt_
b
box
es_3d
.
in_range_bev
(
self
.
bev_range
)
gt_bboxes_3d
=
gt_bboxes_3d
[
mask
]
gt_labels_3d
=
gt_labels_3d
[
mask
]
# limit rad to [-pi, pi]
gt_bboxes_3d
[:,
6
]
=
self
.
limit_period
(
gt_bboxes_3d
[:,
6
],
offset
=
0.5
,
period
=
2
*
np
.
pi
)
input_dict
[
'gt_bboxes_3d'
]
=
gt_bboxes_3d
.
astype
(
'float32'
)
gt_bboxes_3d
.
limit_yaw
(
offset
=
0.5
,
period
=
2
*
np
.
pi
)
input_dict
[
'gt_bboxes_3d'
]
=
gt_bboxes_3d
input_dict
[
'gt_labels_3d'
]
=
gt_labels_3d
return
input_dict
...
...
mmdet3d/models/dense_heads/train_mixins.py
View file @
f2b01720
...
...
@@ -168,6 +168,8 @@ class AnchorTrainMixin(object):
labels
=
anchors
.
new_zeros
(
num_valid_anchors
,
dtype
=
torch
.
long
)
label_weights
=
anchors
.
new_zeros
(
num_valid_anchors
,
dtype
=
torch
.
float
)
if
len
(
gt_bboxes
)
>
0
:
if
not
isinstance
(
gt_bboxes
,
torch
.
Tensor
):
gt_bboxes
=
gt_bboxes
.
tensor
.
to
(
anchors
.
device
)
assign_result
=
bbox_assigner
.
assign
(
anchors
,
gt_bboxes
,
gt_bboxes_ignore
,
gt_labels
)
sampling_result
=
self
.
bbox_sampler
.
sample
(
assign_result
,
anchors
,
...
...
mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
View file @
f2b01720
...
...
@@ -121,8 +121,7 @@ class PartA2BboxHead(nn.Module):
3
,
padding
=
1
,
norm_cfg
=
norm_cfg
,
indice_key
=
f
'rcnn_down0'
,
conv_type
=
'SubMConv3d'
))
indice_key
=
'rcnn_down0'
))
merge_conv_channel_last
=
channel
down_conv_channel_last
=
merge_conv_channel_last
...
...
@@ -135,8 +134,7 @@ class PartA2BboxHead(nn.Module):
3
,
padding
=
1
,
norm_cfg
=
norm_cfg
,
indice_key
=
f
'rcnn_down1'
,
conv_type
=
'SubMConv3d'
))
indice_key
=
'rcnn_down1'
))
down_conv_channel_last
=
channel
self
.
conv_down
.
add_module
(
'merge_conv'
,
...
...
mmdet3d/models/roi_heads/mask_heads/pointwise_semantic_head.py
View file @
f2b01720
...
...
@@ -5,7 +5,6 @@ import torch.nn.functional as F
from
mmdet3d.core
import
multi_apply
from
mmdet3d.core.bbox
import
box_torch_ops
from
mmdet3d.models.builder
import
build_loss
from
mmdet3d.ops.roiaware_pool3d
import
points_in_boxes_gpu
from
mmdet.models
import
HEADS
...
...
@@ -14,7 +13,7 @@ class PointwiseSemanticHead(nn.Module):
"""Semantic segmentation head for point-wise segmentation.
Predict point-wise segmentation and part regression results for PartA2.
See https://arxiv.org/abs/1907.03670 for more detials.
See
`paper <
https://arxiv.org/abs/1907.03670
>`_
for more detials.
Args:
in_channels (int): the number of input channel.
...
...
@@ -65,28 +64,27 @@ class PointwiseSemanticHead(nn.Module):
seg_preds
=
seg_preds
,
part_preds
=
part_preds
,
part_feats
=
part_feats
)
def
get_targets_single
(
self
,
voxel_centers
,
gt_bboxes_3d
,
gt_labels_3d
):
"""generate segmentation and part prediction targets
"""generate segmentation and part prediction targets
for a single sample
Args:
voxel_centers (torch.Tensor): shape [voxel_num, 3],
the center of voxels
gt_bboxes_3d (torch.Tensor): shape [box_num, 7], gt boxes
gt_bboxes_3d (:obj:BaseInstance3DBoxes): gt boxes containing tensor
of shape [box_num, 7].
gt_labels_3d (torch.Tensor): shape [box_num], class label of gt
Returns:
tuple : segmentation targets with shape [voxel_num]
part prediction targets with shape [voxel_num, 3]
"""
enlarged_gt_boxes
=
box_torch_ops
.
enlarge_box3d_lidar
(
gt_bboxes_3d
,
extra_width
=
self
.
extra_width
)
gt_bboxes_3d
=
gt_bboxes_3d
.
to
(
voxel_centers
.
device
)
enlarged_gt_boxes
=
gt_bboxes_3d
.
enlarged_box
(
self
.
extra_width
)
part_targets
=
voxel_centers
.
new_zeros
((
voxel_centers
.
shape
[
0
],
3
),
dtype
=
torch
.
float32
)
box_idx
=
points_in_boxes_gpu
(
voxel_centers
.
unsqueeze
(
0
),
gt_bboxes_3d
.
unsqueeze
(
0
)).
squeeze
(
0
)
# -1 ~ box_num
enlarge_box_idx
=
points_in_boxes_gpu
(
voxel_centers
.
unsqueeze
(
0
),
enlarged_gt_boxes
.
unsqueeze
(
0
)).
squeeze
(
0
).
long
()
# -1 ~ box_num
box_idx
=
gt_bboxes_3d
.
points_in_boxes
(
voxel_centers
)
enlarge_box_idx
=
enlarged_gt_boxes
.
points_in_boxes
(
voxel_centers
).
long
()
gt_labels_pad
=
F
.
pad
(
gt_labels_3d
,
(
1
,
0
),
mode
=
'constant'
,
value
=
self
.
num_classes
)
...
...
@@ -95,24 +93,37 @@ class PointwiseSemanticHead(nn.Module):
ignore_flag
=
fg_pt_flag
^
(
enlarge_box_idx
>
-
1
)
seg_targets
[
ignore_flag
]
=
-
1
for
k
in
range
(
gt_bboxes_3d
.
shape
[
0
]
):
for
k
in
range
(
len
(
gt_bboxes_3d
)
):
k_box_flag
=
box_idx
==
k
# no point in current box (caused by velodyne reduce)
if
not
k_box_flag
.
any
():
continue
fg_voxels
=
voxel_centers
[
k_box_flag
]
transformed_voxels
=
fg_voxels
-
gt_bboxes_3d
[
k
,
0
:
3
]
transformed_voxels
=
fg_voxels
-
gt_bboxes_3d
.
bottom_center
[
k
]
transformed_voxels
=
box_torch_ops
.
rotation_3d_in_axis
(
transformed_voxels
.
unsqueeze
(
0
),
-
gt_bboxes_3d
[
k
,
6
].
view
(
1
),
-
gt_bboxes_3d
.
yaw
[
k
].
view
(
1
),
axis
=
2
)
part_targets
[
k_box_flag
]
=
transformed_voxels
/
gt_bboxes_3d
[
k
,
3
:
6
]
+
voxel_centers
.
new_tensor
([
0.5
,
0.5
,
0
])
part_targets
[
k_box_flag
]
=
transformed_voxels
/
gt_bboxes_3d
.
dims
[
k
]
+
voxel_centers
.
new_tensor
([
0.5
,
0.5
,
0
])
part_targets
=
torch
.
clamp
(
part_targets
,
min
=
0
)
return
seg_targets
,
part_targets
def
get_targets
(
self
,
voxels_dict
,
gt_bboxes_3d
,
gt_labels_3d
):
"""generate segmentation and part prediction targets
Args:
voxel_centers (torch.Tensor): shape [voxel_num, 3],
the center of voxels
gt_bboxes_3d (list[:obj:BaseInstance3DBoxes]): list of gt boxes
containing tensor of shape [box_num, 7].
gt_labels_3d (list[torch.Tensor]): list of GT labels.
Returns:
tuple : segmentation targets with shape [voxel_num]
part prediction targets with shape [voxel_num, 3]
"""
batch_size
=
len
(
gt_labels_3d
)
voxel_center_list
=
[]
for
idx
in
range
(
batch_size
):
...
...
mmdet3d/models/roi_heads/part_aggregation_roi_head.py
View file @
f2b01720
...
...
@@ -8,7 +8,7 @@ from ..builder import build_head, build_roi_extractor
from
.base_3droi_head
import
Base3DRoIHead
@
HEADS
.
register_module
@
HEADS
.
register_module
()
class
PartAggregationROIHead
(
Base3DRoIHead
):
"""Part aggregation roi head for PartA2"""
...
...
@@ -174,7 +174,7 @@ class PartAggregationROIHead(Base3DRoIHead):
cur_proposal_list
=
proposal_list
[
batch_idx
]
cur_boxes
=
cur_proposal_list
[
'boxes_3d'
]
cur_labels_3d
=
cur_proposal_list
[
'labels_3d'
]
cur_gt_bboxes
=
gt_bboxes_3d
[
batch_idx
]
cur_gt_bboxes
=
gt_bboxes_3d
[
batch_idx
]
.
to
(
cur_boxes
.
device
)
cur_gt_labels
=
gt_labels_3d
[
batch_idx
]
batch_num_gts
=
0
...
...
@@ -189,7 +189,7 @@ class PartAggregationROIHead(Base3DRoIHead):
pred_per_cls
=
(
cur_labels_3d
==
i
)
cur_assign_res
=
assigner
.
assign
(
cur_boxes
[
pred_per_cls
],
cur_gt_bboxes
[
gt_per_cls
],
cur_gt_bboxes
.
tensor
[
gt_per_cls
],
gt_labels
=
cur_gt_labels
[
gt_per_cls
])
# gather assign_results in different class into one result
batch_num_gts
+=
cur_assign_res
.
num_gts
...
...
@@ -215,11 +215,11 @@ class PartAggregationROIHead(Base3DRoIHead):
batch_gt_labels
)
else
:
# for single class
assign_result
=
self
.
bbox_assigner
.
assign
(
cur_boxes
,
cur_gt_bboxes
,
gt_labels
=
cur_gt_labels
)
cur_boxes
,
cur_gt_bboxes
.
tensor
,
gt_labels
=
cur_gt_labels
)
# sample boxes
sampling_result
=
self
.
bbox_sampler
.
sample
(
assign_result
,
cur_boxes
,
cur_gt_bboxes
,
cur_gt_bboxes
.
tensor
,
cur_gt_labels
)
sampling_results
.
append
(
sampling_result
)
return
sampling_results
...
...
mmdet3d/models/voxel_encoders/__init__.py
View file @
f2b01720
from
.pillar_encoder
import
AlignedPillarFeatureNet
,
PillarFeatureNet
from
.voxel_encoder
import
(
DynamicVFE
,
VoxelFeatureExtractor
,
VoxelFeatureExtractorV2
,
VoxelFeatureExtractorV3
)
from
.pillar_encoder
import
PillarFeatureNet
from
.voxel_encoder
import
DynamicSimpleVFE
,
DynamicVFE
,
HardSimpleVFE
,
HardVFE
__all__
=
[
'PillarFeatureNet'
,
'
AlignedPillarFeatureNet'
,
'VoxelFeatureExtractor
'
,
'Dynamic
VFE'
,
'VoxelFeatureExtractorV2'
,
'VoxelFeatureExtractorV3
'
'PillarFeatureNet'
,
'
HardVFE'
,
'DynamicVFE'
,
'HardSimpleVFE
'
,
'Dynamic
SimpleVFE
'
]
mmdet3d/models/voxel_encoders/pillar_encoder.py
View file @
f2b01720
...
...
@@ -9,55 +9,54 @@ from .utils import PFNLayer, get_paddings_indicator
@
VOXEL_ENCODERS
.
register_module
()
class
PillarFeatureNet
(
nn
.
Module
):
"""Pillar Feature Net.
The network prepares the pillar features and performs forward pass
through PFNLayers.
Args:
in_channels (int). Number of input features,
either x, y, z or x, y, z, r.
feat_channels (list[int]). Number of features in each of the
N PFNLayers.
with_distance (bool). Whether to include Euclidean distance
to points.
voxel_size (list[float]). Size of voxels, only utilize x and y
size.
point_cloud_range (list[float]). Point cloud range, only
utilizes x and y min.
"""
def
__init__
(
self
,
num_input_features
=
4
,
use_norm
=
True
,
num_filters
=
(
64
,
),
in_channels
=
4
,
feat_channels
=
(
64
,
),
with_distance
=
False
,
with_cluster_center
=
True
,
with_voxel_center
=
True
,
voxel_size
=
(
0.2
,
0.2
,
4
),
point_cloud_range
=
(
0
,
-
40
,
-
3
,
70.4
,
40
,
1
),
norm_cfg
=
dict
(
type
=
'BN1d'
,
eps
=
1e-3
,
momentum
=
0.01
),
mode
=
'max'
):
""" Pillar Feature Net.
The network prepares the pillar features and performs forward pass
through PFNLayers.
Args:
num_input_features (int). Number of input features,
either x, y, z or x, y, z, r.
use_norm (bool). Whether to include BatchNorm.
num_filters (list[int]). Number of features in each of the
N PFNLayers.
with_distance (bool). Whether to include Euclidean distance
to points.
voxel_size (list[float]). Size of voxels, only utilize x and y
size.
point_cloud_range (list[float>]). Point cloud range, only
utilize x and y min.
"""
super
(
PillarFeatureNet
,
self
).
__init__
()
assert
len
(
num_filter
s
)
>
0
assert
len
(
feat_channel
s
)
>
0
if
with_cluster_center
:
num_input_feature
s
+=
3
in_channel
s
+=
3
if
with_voxel_center
:
num_input_feature
s
+=
2
in_channel
s
+=
2
if
with_distance
:
num_input_feature
s
+=
1
in_channel
s
+=
1
self
.
_with_distance
=
with_distance
self
.
_with_cluster_center
=
with_cluster_center
self
.
_with_voxel_center
=
with_voxel_center
# Create PillarFeatureNet layers
self
.
num_input_features
=
num_input_feature
s
num_filters
=
[
num_input_features
]
+
list
(
num_filter
s
)
self
.
in_channels
=
in_channel
s
feat_channels
=
[
in_channels
]
+
list
(
feat_channel
s
)
pfn_layers
=
[]
for
i
in
range
(
len
(
num_filter
s
)
-
1
):
in_filters
=
num_filter
s
[
i
]
out_filters
=
num_filter
s
[
i
+
1
]
if
i
<
len
(
num_filter
s
)
-
2
:
for
i
in
range
(
len
(
feat_channel
s
)
-
1
):
in_filters
=
feat_channel
s
[
i
]
out_filters
=
feat_channel
s
[
i
+
1
]
if
i
<
len
(
feat_channel
s
)
-
2
:
last_layer
=
False
else
:
last_layer
=
True
...
...
@@ -65,7 +64,7 @@ class PillarFeatureNet(nn.Module):
PFNLayer
(
in_filters
,
out_filters
,
use_
norm
,
norm
_cfg
=
norm_cfg
,
last_layer
=
last_layer
,
mode
=
mode
))
self
.
pfn_layers
=
nn
.
ModuleList
(
pfn_layers
)
...
...
@@ -122,9 +121,8 @@ class PillarFeatureNet(nn.Module):
class
DynamicPillarFeatureNet
(
PillarFeatureNet
):
def
__init__
(
self
,
num_input_features
=
4
,
use_norm
=
True
,
num_filters
=
(
64
,
),
in_channels
=
4
,
feat_channels
=
(
64
,
),
with_distance
=
False
,
with_cluster_center
=
True
,
with_voxel_center
=
True
,
...
...
@@ -138,23 +136,23 @@ class DynamicPillarFeatureNet(PillarFeatureNet):
"""
super
(
DynamicPillarFeatureNet
,
self
).
__init__
(
num_input_features
,
use_norm
,
num_filters
,
in_channels
,
feat_channels
,
with_distance
,
with_cluster_center
=
with_cluster_center
,
with_voxel_center
=
with_voxel_center
,
voxel_size
=
voxel_size
,
point_cloud_range
=
point_cloud_range
,
norm_cfg
=
norm_cfg
,
mode
=
mode
)
num_filters
=
[
self
.
num_input_features
]
+
list
(
num_filter
s
)
feat_channels
=
[
self
.
in_channels
]
+
list
(
feat_channel
s
)
pfn_layers
=
[]
# TODO: currently only support one PFNLayer
for
i
in
range
(
len
(
num_filter
s
)
-
1
):
in_filters
=
num_filter
s
[
i
]
out_filters
=
num_filter
s
[
i
+
1
]
for
i
in
range
(
len
(
feat_channel
s
)
-
1
):
in_filters
=
feat_channel
s
[
i
]
out_filters
=
feat_channel
s
[
i
+
1
]
if
i
>
0
:
in_filters
*=
2
norm_name
,
norm_layer
=
build_norm_layer
(
norm_cfg
,
out_filters
)
...
...
@@ -235,145 +233,3 @@ class DynamicPillarFeatureNet(PillarFeatureNet):
features
=
torch
.
cat
([
point_feats
,
feat_per_point
],
dim
=
1
)
return
voxel_feats
,
voxel_coors
@
VOXEL_ENCODERS
.
register_module
()
class
AlignedPillarFeatureNet
(
nn
.
Module
):
def
__init__
(
self
,
num_input_features
=
4
,
use_norm
=
True
,
num_filters
=
(
64
,
),
with_distance
=
False
,
with_cluster_center
=
True
,
with_voxel_center
=
True
,
voxel_size
=
(
0.2
,
0.2
,
4
),
point_cloud_range
=
(
0
,
-
40
,
-
3
,
70.4
,
40
,
1
),
mode
=
'max'
):
""" Pillar Feature Net.
The network prepares the pillar features and performs forward pass
through PFNLayers.
Args:
num_input_features (int): Number of input features, either x, y, z
or x, y, z, r.
use_norm (bool): Whether to include BatchNorm.
num_filters (list[int]): Number of features in each of the N
PFNLayers.
with_distance (bool): Whether to include Euclidean distance to
points.
voxel_size (list[float]): Size of voxels, only utilize x and y
size.
point_cloud_range: (list[float]): Point cloud range, only
utilize x and y min.
"""
super
(
AlignedPillarFeatureNet
,
self
).
__init__
()
assert
len
(
num_filters
)
>
0
if
with_cluster_center
:
print
(
'Use cluster center'
)
num_input_features
+=
3
if
with_voxel_center
:
print
(
'Use voxel center'
)
num_input_features
+=
2
if
with_distance
:
num_input_features
+=
1
self
.
_with_distance
=
with_distance
self
.
_with_cluster_center
=
with_cluster_center
self
.
_with_voxel_center
=
with_voxel_center
# Create PillarFeatureNet layers
num_filters
=
[
num_input_features
]
+
list
(
num_filters
)
pfn_layers
=
[]
for
i
in
range
(
len
(
num_filters
)
-
1
):
in_filters
=
num_filters
[
i
]
out_filters
=
num_filters
[
i
+
1
]
if
i
<
len
(
num_filters
)
-
2
:
last_layer
=
False
else
:
last_layer
=
True
pfn_layers
.
append
(
PFNLayer
(
in_filters
,
out_filters
,
use_norm
,
last_layer
=
last_layer
,
mode
=
mode
))
self
.
pfn_layers
=
nn
.
ModuleList
(
pfn_layers
)
# Need pillar (voxel) size and x/y offset in order to
# calculate pillar offset
self
.
vx
=
voxel_size
[
0
]
self
.
vy
=
voxel_size
[
1
]
self
.
vz
=
voxel_size
[
2
]
self
.
x_offset
=
self
.
vx
/
2
+
point_cloud_range
[
0
]
self
.
y_offset
=
self
.
vy
/
2
+
point_cloud_range
[
1
]
self
.
z_offset
=
self
.
vz
/
2
+
point_cloud_range
[
2
]
def
forward
(
self
,
features
,
num_points
,
coors
):
features_ls
=
[
features
]
# Find distance of x, y, and z from cluster center
if
self
.
_with_cluster_center
:
points_mean
=
features
[:,
:,
:
3
].
sum
(
dim
=
1
,
keepdim
=
True
)
/
num_points
.
type_as
(
features
).
view
(
-
1
,
1
,
1
)
f_cluster
=
features
[:,
:,
:
3
]
-
points_mean
features_ls
.
append
(
f_cluster
)
x_distance
=
features
[:,
:,
0
]
-
(
coors
[:,
3
].
type_as
(
features
).
unsqueeze
(
1
)
*
self
.
vx
+
self
.
x_offset
)
y_distance
=
features
[:,
:,
1
]
-
(
coors
[:,
2
].
type_as
(
features
).
unsqueeze
(
1
)
*
self
.
vy
+
self
.
y_offset
)
z_distance
=
features
[:,
:,
2
]
-
(
coors
[:,
1
].
type_as
(
features
).
unsqueeze
(
1
)
*
self
.
vz
+
self
.
z_offset
)
normed_x_distance
=
1
-
torch
.
abs
(
x_distance
/
self
.
vx
)
normed_y_distance
=
1
-
torch
.
abs
(
y_distance
/
self
.
vy
)
normed_z_distance
=
1
-
torch
.
abs
(
z_distance
/
self
.
vz
)
x_mask
=
torch
.
gt
(
normed_x_distance
,
0
).
type_as
(
features
)
y_mask
=
torch
.
gt
(
normed_y_distance
,
0
).
type_as
(
features
)
z_mask
=
torch
.
gt
(
normed_z_distance
,
0
).
type_as
(
features
)
nonzero_points_mask
=
x_mask
.
mul
(
y_mask
).
mul
(
z_mask
)
aligned_distance
=
normed_x_distance
.
mul
(
normed_y_distance
).
mul
(
normed_z_distance
).
mul
(
nonzero_points_mask
)
# Find distance of x, y, and z from pillar center
if
self
.
_with_voxel_center
:
f_center
=
features
[:,
:,
:
2
]
f_center
[:,
:,
0
]
=
f_center
[:,
:,
0
]
-
(
coors
[:,
3
].
type_as
(
features
).
unsqueeze
(
1
)
*
self
.
vx
+
self
.
x_offset
)
f_center
[:,
:,
1
]
=
f_center
[:,
:,
1
]
-
(
coors
[:,
2
].
type_as
(
features
).
unsqueeze
(
1
)
*
self
.
vy
+
self
.
y_offset
)
features_ls
.
append
(
f_center
)
if
self
.
_with_distance
:
points_dist
=
torch
.
norm
(
features
[:,
:,
:
3
],
2
,
2
,
keepdim
=
True
)
features_ls
.
append
(
points_dist
)
# Combine together feature decorations
features
=
torch
.
cat
(
features_ls
,
dim
=-
1
)
# The feature decorations were calculated without regard to
# whether pillar was empty. Need to ensure that
# empty pillars remain set to zeros.
voxel_count
=
features
.
shape
[
1
]
mask
=
get_paddings_indicator
(
num_points
,
voxel_count
,
axis
=
0
)
mask
=
torch
.
unsqueeze
(
mask
,
-
1
).
type_as
(
features
)
features
*=
mask
for
pfn
in
self
.
pfn_layers
:
if
pfn
.
last_vfe
:
features
=
pfn
(
features
,
aligned_distance
)
else
:
features
=
pfn
(
features
)
return
features
.
squeeze
()
mmdet3d/models/voxel_encoders/utils.py
View file @
f2b01720
...
...
@@ -4,28 +4,15 @@ from torch import nn
from
torch.nn
import
functional
as
F
class
Empty
(
nn
.
Module
):
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
(
Empty
,
self
).
__init__
()
def
forward
(
self
,
*
args
,
**
kwargs
):
if
len
(
args
)
==
1
:
return
args
[
0
]
elif
len
(
args
)
==
0
:
return
None
return
args
def
get_paddings_indicator
(
actual_num
,
max_num
,
axis
=
0
):
"""Create boolean mask by actually number of a padded tensor.
Args:
actual_num (
[type]): [description]
max_num (
[type]): [description]
actual_num (
torch.Tensor): Actual number of points in each voxel.
max_num (
int): Max number of points in each voxel
Returns:
[type]: [description]
torch.Tensor: Mask indicates which points are valid inside a voxel.
"""
actual_num
=
torch
.
unsqueeze
(
actual_num
,
axis
+
1
)
# tiled_actual_num: [N, M, 1]
...
...
@@ -52,13 +39,9 @@ class VFELayer(nn.Module):
self
.
cat_max
=
cat_max
self
.
max_out
=
max_out
# self.units = int(out_channels / 2)
if
norm_cfg
:
norm_name
,
norm_layer
=
build_norm_layer
(
norm_cfg
,
out_channels
)
self
.
norm
=
norm_layer
self
.
linear
=
nn
.
Linear
(
in_channels
,
out_channels
,
bias
=
False
)
else
:
self
.
norm
=
Empty
(
out_channels
)
self
.
linear
=
nn
.
Linear
(
in_channels
,
out_channels
,
bias
=
True
)
self
.
norm
=
build_norm_layer
(
norm_cfg
,
out_channels
)[
1
]
self
.
linear
=
nn
.
Linear
(
in_channels
,
out_channels
,
bias
=
False
)
def
forward
(
self
,
inputs
):
# [K, T, 7] tensordot [7, units] = [K, T, units]
...
...
@@ -89,7 +72,7 @@ class PFNLayer(nn.Module):
def
__init__
(
self
,
in_channels
,
out_channels
,
use_norm
=
True
,
norm_cfg
=
dict
(
type
=
'BN1d'
,
eps
=
1e-3
,
momentum
=
0.01
)
,
last_layer
=
False
,
mode
=
'max'
):
""" Pillar Feature Net Layer.
...
...
@@ -100,9 +83,11 @@ class PFNLayer(nn.Module):
Args:
in_channels (int): Number of input channels.
out_channels (int): Number of output channels.
use_norm (bool): Whether to include BatchNorm.
norm_cfg (dict): Config dict of normalization layers
last_layer (bool): If last_layer, there is no concatenation of
features.
mode (str): Pooling model to gather features inside voxels.
Default to 'max'.
"""
super
().
__init__
()
...
...
@@ -112,13 +97,10 @@ class PFNLayer(nn.Module):
out_channels
=
out_channels
//
2
self
.
units
=
out_channels
if
use_norm
:
self
.
norm
=
nn
.
BatchNorm1d
(
self
.
units
,
eps
=
1e-3
,
momentum
=
0.01
)
self
.
linear
=
nn
.
Linear
(
in_channels
,
self
.
units
,
bias
=
False
)
else
:
self
.
norm
=
Empty
(
self
.
unints
)
self
.
linear
=
nn
.
Linear
(
in_channels
,
self
.
units
,
bias
=
True
)
self
.
norm
=
build_norm_layer
(
norm_cfg
,
self
.
units
)[
1
]
self
.
linear
=
nn
.
Linear
(
in_channels
,
self
.
units
,
bias
=
False
)
assert
mode
in
[
'max'
,
'avg'
]
self
.
mode
=
mode
def
forward
(
self
,
inputs
,
num_voxels
=
None
,
aligned_distance
=
None
):
...
...
mmdet3d/models/voxel_encoders/voxel_encoder.py
View file @
f2b01720
import
torch
from
mmcv.cnn
import
build_norm_layer
from
torch
import
nn
from
torch.nn
import
functional
as
F
from
mmdet3d.ops
import
DynamicScatter
from
..
import
builder
from
..registry
import
VOXEL_ENCODERS
from
.utils
import
Empty
,
VFELayer
,
get_paddings_indicator
from
.utils
import
VFELayer
,
get_paddings_indicator
@
VOXEL_ENCODERS
.
register_module
()
class
VoxelFeatureExtractor
(
nn
.
Module
):
class
HardSimpleVFE
(
nn
.
Module
):
"""Simple voxel feature encoder used in SECOND
def
__init__
(
self
,
num_input_features
=
4
,
use_norm
=
True
,
num_filters
=
[
32
,
128
],
with_distance
=
False
,
name
=
'VoxelFeatureExtractor'
):
super
(
VoxelFeatureExtractor
,
self
).
__init__
()
self
.
name
=
name
assert
len
(
num_filters
)
==
2
num_input_features
+=
3
# add mean features
if
with_distance
:
num_input_features
+=
1
self
.
_with_distance
=
with_distance
self
.
vfe1
=
VFELayer
(
num_input_features
,
num_filters
[
0
],
use_norm
)
self
.
vfe2
=
VFELayer
(
num_filters
[
0
],
num_filters
[
1
],
use_norm
)
if
use_norm
:
self
.
linear
=
nn
.
Linear
(
num_filters
[
1
],
num_filters
[
1
],
bias
=
False
)
self
.
norm
=
nn
.
BatchNorm1d
(
num_filters
[
1
],
eps
=
1e-3
,
momentum
=
0.01
)
else
:
self
.
linear
=
nn
.
Linear
(
num_filters
[
1
],
num_filters
[
1
],
bias
=
True
)
self
.
norm
=
Empty
(
num_filters
[
1
])
def
forward
(
self
,
features
,
num_voxels
,
**
kwargs
):
# features: [concated_num_points, num_voxel_size, 3(4)]
# num_voxels: [concated_num_points]
# t = time.time()
# torch.cuda.synchronize()
points_mean
=
features
[:,
:,
:
3
].
sum
(
dim
=
1
,
keepdim
=
True
)
/
num_voxels
.
type_as
(
features
).
view
(
-
1
,
1
,
1
)
features_relative
=
features
[:,
:,
:
3
]
-
points_mean
if
self
.
_with_distance
:
points_dist
=
torch
.
norm
(
features
[:,
:,
:
3
],
2
,
2
,
keepdim
=
True
)
features
=
torch
.
cat
([
features
,
features_relative
,
points_dist
],
dim
=-
1
)
else
:
features
=
torch
.
cat
([
features
,
features_relative
],
dim
=-
1
)
voxel_count
=
features
.
shape
[
1
]
mask
=
get_paddings_indicator
(
num_voxels
,
voxel_count
,
axis
=
0
)
mask
=
torch
.
unsqueeze
(
mask
,
-
1
).
type_as
(
features
)
# mask = features.max(dim=2, keepdim=True)[0] != 0
# torch.cuda.synchronize()
# print("vfe prep forward time", time.time() - t)
x
=
self
.
vfe1
(
features
)
x
*=
mask
x
=
self
.
vfe2
(
x
)
x
*=
mask
x
=
self
.
linear
(
x
)
x
=
self
.
norm
(
x
.
permute
(
0
,
2
,
1
).
contiguous
()).
permute
(
0
,
2
,
1
).
contiguous
()
x
=
F
.
relu
(
x
)
x
*=
mask
# x: [concated_num_points, num_voxel_size, 128]
voxelwise
=
torch
.
max
(
x
,
dim
=
1
)[
0
]
return
voxelwise
It simply averages the values of points in a voxel.
"""
@
VOXEL_ENCODERS
.
register_module
()
class
VoxelFeatureExtractorV2
(
nn
.
Module
):
def
__init__
(
self
,
num_input_features
=
4
,
use_norm
=
True
,
num_filters
=
[
32
,
128
],
with_distance
=
False
,
name
=
'VoxelFeatureExtractor'
):
super
(
VoxelFeatureExtractorV2
,
self
).
__init__
()
self
.
name
=
name
assert
len
(
num_filters
)
>
0
num_input_features
+=
3
if
with_distance
:
num_input_features
+=
1
self
.
_with_distance
=
with_distance
num_filters
=
[
num_input_features
]
+
num_filters
filters_pairs
=
[[
num_filters
[
i
],
num_filters
[
i
+
1
]]
for
i
in
range
(
len
(
num_filters
)
-
1
)]
self
.
vfe_layers
=
nn
.
ModuleList
(
[
VFELayer
(
i
,
o
,
use_norm
)
for
i
,
o
in
filters_pairs
])
if
use_norm
:
self
.
linear
=
nn
.
Linear
(
num_filters
[
-
1
],
num_filters
[
-
1
],
bias
=
False
)
self
.
norm
=
nn
.
BatchNorm1d
(
num_filters
[
-
1
],
eps
=
1e-3
,
momentum
=
0.01
)
else
:
self
.
linear
=
nn
.
Linear
(
num_filters
[
-
1
],
num_filters
[
-
1
],
bias
=
True
)
self
.
norm
=
Empty
(
num_filters
[
-
1
])
def
forward
(
self
,
features
,
num_voxels
,
**
kwargs
):
# features: [concated_num_points, num_voxel_size, 3(4)]
# num_voxels: [concated_num_points]
points_mean
=
features
[:,
:,
:
3
].
sum
(
dim
=
1
,
keepdim
=
True
)
/
num_voxels
.
type_as
(
features
).
view
(
-
1
,
1
,
1
)
features_relative
=
features
[:,
:,
:
3
]
-
points_mean
if
self
.
_with_distance
:
points_dist
=
torch
.
norm
(
features
[:,
:,
:
3
],
2
,
2
,
keepdim
=
True
)
features
=
torch
.
cat
([
features
,
features_relative
,
points_dist
],
dim
=-
1
)
else
:
features
=
torch
.
cat
([
features
,
features_relative
],
dim
=-
1
)
voxel_count
=
features
.
shape
[
1
]
mask
=
get_paddings_indicator
(
num_voxels
,
voxel_count
,
axis
=
0
)
mask
=
torch
.
unsqueeze
(
mask
,
-
1
).
type_as
(
features
)
for
vfe
in
self
.
vfe_layers
:
features
=
vfe
(
features
)
features
*=
mask
features
=
self
.
linear
(
features
)
features
=
self
.
norm
(
features
.
permute
(
0
,
2
,
1
).
contiguous
()).
permute
(
0
,
2
,
1
).
contiguous
()
features
=
F
.
relu
(
features
)
features
*=
mask
# x: [concated_num_points, num_voxel_size, 128]
voxelwise
=
torch
.
max
(
features
,
dim
=
1
)[
0
]
return
voxelwise
@
VOXEL_ENCODERS
.
register_module
()
class
VoxelFeatureExtractorV3
(
nn
.
Module
):
def
__init__
(
self
,
num_input_features
=
4
,
use_norm
=
True
,
num_filters
=
[
32
,
128
],
with_distance
=
False
,
name
=
'VoxelFeatureExtractor'
):
super
(
VoxelFeatureExtractorV3
,
self
).
__init__
()
self
.
name
=
name
def
__init__
(
self
):
super
(
HardSimpleVFE
,
self
).
__init__
()
def
forward
(
self
,
features
,
num_points
,
coors
):
# features: [concated_num_points, num_voxel_size, 3(4)]
...
...
@@ -153,13 +27,21 @@ class VoxelFeatureExtractorV3(nn.Module):
@
VOXEL_ENCODERS
.
register_module
()
class
DynamicVFEV3
(
nn
.
Module
):
class
DynamicSimpleVFE
(
nn
.
Module
):
"""Simple dynamic voxel feature encoder used in DV-SECOND
It simply averages the values of points in a voxel.
But the number of points in a voxel is dynamic and varies.
Args:
voxel_size (tupe[float]): Size of a single voxel
point_cloud_range (tuple[float]): Range of the point cloud and voxels
"""
def
__init__
(
self
,
num_input_features
=
4
,
voxel_size
=
(
0.2
,
0.2
,
4
),
point_cloud_range
=
(
0
,
-
40
,
-
3
,
70.4
,
40
,
1
)):
super
(
DynamicVFE
V3
,
self
).
__init__
()
super
(
Dynamic
Simple
VFE
,
self
).
__init__
()
self
.
scatter
=
DynamicScatter
(
voxel_size
,
point_cloud_range
,
True
)
@
torch
.
no_grad
()
...
...
@@ -172,10 +54,37 @@ class DynamicVFEV3(nn.Module):
@
VOXEL_ENCODERS
.
register_module
()
class
DynamicVFE
(
nn
.
Module
):
"""Dynamic Voxel feature encoder used in DV-SECOND
It encodes features of voxels and their points. It could also fuse
image feature into voxel features in a point-wise manner.
The number of points inside the voxel varies.
Args:
in_channels (int): Input channels of VFE. Defaults to 4.
feat_channels (list(int)): Channels of features in VFE.
with_distance (bool): Whether to use the L2 distance of points to the
origin point. Default False.
with_cluster_center (bool): Whether to use the distance to cluster
center of points inside a voxel. Default to False.
with_voxel_center (bool): Whether to use the distance to center of
voxel for each points inside a voxel. Default to False.
voxel_size (tuple[float]): Size of a single voxel. Default to
(0.2, 0.2, 4).
point_cloud_range (tuple[float]): The range of points or voxels.
Default to (0, -40, -3, 70.4, 40, 1).
norm_cfg (dict): Config dict of normalization layers.
mode (str): The mode when pooling features of points inside a voxel.
Available options include 'max' and 'avg'. Default to 'max'.
fusion_layer (dict | None): The config dict of fusion layer used in
multi-modal detectors. Default to None.
return_point_feats (bool): Whether to return the features of each
points. Default to False.
"""
def
__init__
(
self
,
num_input_feature
s
=
4
,
num_filter
s
=
[],
in_channel
s
=
4
,
feat_channel
s
=
[],
with_distance
=
False
,
with_cluster_center
=
False
,
with_voxel_center
=
False
,
...
...
@@ -186,14 +95,15 @@ class DynamicVFE(nn.Module):
fusion_layer
=
None
,
return_point_feats
=
False
):
super
(
DynamicVFE
,
self
).
__init__
()
assert
len
(
num_filters
)
>
0
assert
mode
in
[
'avg'
,
'max'
]
assert
len
(
feat_channels
)
>
0
if
with_cluster_center
:
num_input_feature
s
+=
3
in_channel
s
+=
3
if
with_voxel_center
:
num_input_feature
s
+=
3
in_channel
s
+=
3
if
with_distance
:
num_input_feature
s
+=
3
self
.
num_input_features
=
num_input_feature
s
in_channel
s
+=
3
self
.
in_channels
=
in_channel
s
self
.
_with_distance
=
with_distance
self
.
_with_cluster_center
=
with_cluster_center
self
.
_with_voxel_center
=
with_voxel_center
...
...
@@ -209,11 +119,11 @@ class DynamicVFE(nn.Module):
self
.
point_cloud_range
=
point_cloud_range
self
.
scatter
=
DynamicScatter
(
voxel_size
,
point_cloud_range
,
True
)
num_filters
=
[
self
.
num_input_features
]
+
list
(
num_filter
s
)
feat_channels
=
[
self
.
in_channels
]
+
list
(
feat_channel
s
)
vfe_layers
=
[]
for
i
in
range
(
len
(
num_filter
s
)
-
1
):
in_filters
=
num_filter
s
[
i
]
out_filters
=
num_filter
s
[
i
+
1
]
for
i
in
range
(
len
(
feat_channel
s
)
-
1
):
in_filters
=
feat_channel
s
[
i
]
out_filters
=
feat_channel
s
[
i
+
1
]
if
i
>
0
:
in_filters
*=
2
norm_name
,
norm_layer
=
build_norm_layer
(
norm_cfg
,
out_filters
)
...
...
@@ -232,6 +142,16 @@ class DynamicVFE(nn.Module):
self
.
fusion_layer
=
builder
.
build_fusion_layer
(
fusion_layer
)
def
map_voxel_center_to_point
(
self
,
pts_coors
,
voxel_mean
,
voxel_coors
):
"""Map voxel features to its corresponding points.
Args:
pts_coors (torch.Tensor): Voxel coordinate of each point.
voxel_mean (torch.Tensor): Voxel features to be mapped.
voxel_coors (torch.Tensor): Coordinates of valid voxels
Returns:
torch.Tensor: Features or centers of each point.
"""
# Step 1: scatter voxel into canvas
# Calculate necessary things for canvas creation
canvas_z
=
int
(
...
...
@@ -269,9 +189,21 @@ class DynamicVFE(nn.Module):
points
=
None
,
img_feats
=
None
,
img_meta
=
None
):
"""
features (torch.Tensor): NxC
coors (torch.Tensor): Nx(1+NDim)
"""Forward functions
Args:
features (torch.Tensor): Features of voxels, shape is NxC.
coors (torch.Tensor): Coordinates of voxels, shape is Nx(1+NDim).
points (list[torch.Tensor], optional): Raw points used to guide the
multi-modality fusion. Defaults to None.
img_feats (list[torch.Tensor], optional): Image fetures used for
multi-modality fusion. Defaults to None.
img_meta (dict, optional): [description]. Defaults to None.
Returns:
tuple: If `return_point_feats` is False, returns voxel features and
its coordinates. If `return_point_feats` is True, returns
feature of each points inside voxels.
"""
features_ls
=
[
features
]
# Find distance of x, y, and z from cluster center
...
...
@@ -320,10 +252,36 @@ class DynamicVFE(nn.Module):
@
VOXEL_ENCODERS
.
register_module
()
class
HardVFE
(
nn
.
Module
):
"""Voxel feature encoder used in DV-SECOND
It encodes features of voxels and their points. It could also fuse
image feature into voxel features in a point-wise manner.
Args:
in_channels (int): Input channels of VFE. Defaults to 4.
feat_channels (list(int)): Channels of features in VFE.
with_distance (bool): Whether to use the L2 distance of points to the
origin point. Default False.
with_cluster_center (bool): Whether to use the distance to cluster
center of points inside a voxel. Default to False.
with_voxel_center (bool): Whether to use the distance to center of
voxel for each points inside a voxel. Default to False.
voxel_size (tuple[float]): Size of a single voxel. Default to
(0.2, 0.2, 4).
point_cloud_range (tuple[float]): The range of points or voxels.
Default to (0, -40, -3, 70.4, 40, 1).
norm_cfg (dict): Config dict of normalization layers.
mode (str): The mode when pooling features of points inside a voxel.
Available options include 'max' and 'avg'. Default to 'max'.
fusion_layer (dict | None): The config dict of fusion layer used in
multi-modal detectors. Default to None.
return_point_feats (bool): Whether to return the features of each
points. Default to False.
"""
def
__init__
(
self
,
num_input_feature
s
=
4
,
num_filter
s
=
[],
in_channel
s
=
4
,
feat_channel
s
=
[],
with_distance
=
False
,
with_cluster_center
=
False
,
with_voxel_center
=
False
,
...
...
@@ -334,14 +292,14 @@ class HardVFE(nn.Module):
fusion_layer
=
None
,
return_point_feats
=
False
):
super
(
HardVFE
,
self
).
__init__
()
assert
len
(
num_filter
s
)
>
0
assert
len
(
feat_channel
s
)
>
0
if
with_cluster_center
:
num_input_feature
s
+=
3
in_channel
s
+=
3
if
with_voxel_center
:
num_input_feature
s
+=
3
in_channel
s
+=
3
if
with_distance
:
num_input_feature
s
+=
3
self
.
num_input_features
=
num_input_feature
s
in_channel
s
+=
3
self
.
in_channels
=
in_channel
s
self
.
_with_distance
=
with_distance
self
.
_with_cluster_center
=
with_cluster_center
self
.
_with_voxel_center
=
with_voxel_center
...
...
@@ -357,16 +315,16 @@ class HardVFE(nn.Module):
self
.
point_cloud_range
=
point_cloud_range
self
.
scatter
=
DynamicScatter
(
voxel_size
,
point_cloud_range
,
True
)
num_filters
=
[
self
.
num_input_features
]
+
list
(
num_filter
s
)
feat_channels
=
[
self
.
in_channels
]
+
list
(
feat_channel
s
)
vfe_layers
=
[]
for
i
in
range
(
len
(
num_filter
s
)
-
1
):
in_filters
=
num_filter
s
[
i
]
out_filters
=
num_filter
s
[
i
+
1
]
for
i
in
range
(
len
(
feat_channel
s
)
-
1
):
in_filters
=
feat_channel
s
[
i
]
out_filters
=
feat_channel
s
[
i
+
1
]
if
i
>
0
:
in_filters
*=
2
# TODO: pass norm_cfg to VFE
# norm_name, norm_layer = build_norm_layer(norm_cfg, out_filters)
if
i
==
(
len
(
num_filter
s
)
-
2
):
if
i
==
(
len
(
feat_channel
s
)
-
2
):
cat_max
=
False
max_out
=
True
if
fusion_layer
:
...
...
@@ -394,9 +352,20 @@ class HardVFE(nn.Module):
coors
,
img_feats
=
None
,
img_meta
=
None
):
"""
features (torch.Tensor): NxMxC
coors (torch.Tensor): Nx(1+NDim)
"""Forward functions
Args:
features (torch.Tensor): Features of voxels, shape is MxNxC.
num_points (torch.Tensor): Number of points in each voxel.
coors (torch.Tensor): Coordinates of voxels, shape is Mx(1+NDim).
img_feats (list[torch.Tensor], optional): Image fetures used for
multi-modality fusion. Defaults to None.
img_meta (dict, optional): [description]. Defaults to None.
Returns:
tuple: If `return_point_feats` is False, returns voxel features and
its coordinates. If `return_point_feats` is True, returns
feature of each points inside voxels.
"""
features_ls
=
[
features
]
# Find distance of x, y, and z from cluster center
...
...
@@ -438,19 +407,29 @@ class HardVFE(nn.Module):
for
i
,
vfe
in
enumerate
(
self
.
vfe_layers
):
voxel_feats
=
vfe
(
voxel_feats
)
if
torch
.
isnan
(
voxel_feats
).
any
():
import
pdb
pdb
.
set_trace
()
if
(
self
.
fusion_layer
is
not
None
and
img_feats
is
not
None
):
voxel_feats
=
self
.
fusion_with_mask
(
features
,
mask
,
voxel_feats
,
coors
,
img_feats
,
img_meta
)
if
torch
.
isnan
(
voxel_feats
).
any
():
import
pdb
pdb
.
set_trace
()
return
voxel_feats
def
fusion_with_mask
(
self
,
features
,
mask
,
voxel_feats
,
coors
,
img_feats
,
img_meta
):
"""Fuse image and point features with mask.
Args:
features (torch.Tensor): Features of voxel, usually it is the
values of points in voxels.
mask (torch.Tensor): Mask indicates valid features in each voxel.
voxel_feats (torch.Tensor): Features of voxels.
coors (torch.Tensor): Coordinates of each single voxel.
img_feats (list[torch.Tensor]): Multi-scale feature maps of image.
img_meta (list(dict)): Meta information of image and points.
Returns:
torch.Tensor: Fused features of each voxel.
"""
# the features is consist of a batch of points
batch_size
=
coors
[
-
1
,
0
]
+
1
points
=
[]
...
...
@@ -459,20 +438,13 @@ class HardVFE(nn.Module):
points
.
append
(
features
[
single_mask
][
mask
[
single_mask
]])
point_feats
=
voxel_feats
[
mask
]
if
torch
.
isnan
(
point_feats
).
any
():
import
pdb
pdb
.
set_trace
()
point_feats
=
self
.
fusion_layer
(
img_feats
,
points
,
point_feats
,
img_meta
)
if
torch
.
isnan
(
point_feats
).
any
():
import
pdb
pdb
.
set_trace
()
voxel_canvas
=
voxel_feats
.
new_zeros
(
size
=
(
voxel_feats
.
size
(
0
),
voxel_feats
.
size
(
1
),
point_feats
.
size
(
-
1
)))
voxel_canvas
[
mask
]
=
point_feats
out
=
torch
.
max
(
voxel_canvas
,
dim
=
1
)[
0
]
if
torch
.
isnan
(
out
).
any
():
import
pdb
pdb
.
set_trace
()
return
out
tests/test_box3d.py
View file @
f2b01720
...
...
@@ -3,7 +3,7 @@ import pytest
import
torch
from
mmdet3d.core.bbox
import
(
Box3DMode
,
CameraInstance3DBoxes
,
LiDARInstance3DBoxes
)
DepthInstance3DBoxes
,
LiDARInstance3DBoxes
)
def
test_lidar_boxes3d
():
...
...
@@ -13,6 +13,46 @@ def test_lidar_boxes3d():
assert
boxes
.
tensor
.
shape
[
0
]
==
0
assert
boxes
.
tensor
.
shape
[
1
]
==
7
# Test init with origin
gravity_center_box
=
np
.
array
(
[[
-
5.24223238e+00
,
4.00209696e+01
,
2.97570381e-01
,
2.06200000e+00
,
4.40900000e+00
,
1.54800000e+00
,
-
1.48801203e+00
],
[
-
2.66751588e+01
,
5.59499564e+00
,
-
9.14345860e-01
,
3.43000000e-01
,
4.58000000e-01
,
7.82000000e-01
,
-
4.62759755e+00
],
[
-
5.80979675e+00
,
3.54092357e+01
,
2.00889888e-01
,
2.39600000e+00
,
3.96900000e+00
,
1.73200000e+00
,
-
4.65203216e+00
],
[
-
3.13086877e+01
,
1.09007628e+00
,
-
1.94612112e-01
,
1.94400000e+00
,
3.85700000e+00
,
1.72300000e+00
,
-
2.81427027e+00
]],
dtype
=
np
.
float32
)
bottom_center_box
=
LiDARInstance3DBoxes
(
gravity_center_box
,
origin
=
[
0.5
,
0.5
,
0.5
])
expected_tensor
=
torch
.
tensor
(
[[
-
5.24223238e+00
,
4.00209696e+01
,
-
4.76429619e-01
,
2.06200000e+00
,
4.40900000e+00
,
1.54800000e+00
,
-
1.48801203e+00
],
[
-
2.66751588e+01
,
5.59499564e+00
,
-
1.30534586e+00
,
3.43000000e-01
,
4.58000000e-01
,
7.82000000e-01
,
-
4.62759755e+00
],
[
-
5.80979675e+00
,
3.54092357e+01
,
-
6.65110112e-01
,
2.39600000e+00
,
3.96900000e+00
,
1.73200000e+00
,
-
4.65203216e+00
],
[
-
3.13086877e+01
,
1.09007628e+00
,
-
1.05611211e+00
,
1.94400000e+00
,
3.85700000e+00
,
1.72300000e+00
,
-
2.81427027e+00
]])
assert
torch
.
allclose
(
expected_tensor
,
bottom_center_box
.
tensor
)
# Test init with numpy array
np_boxes
=
np
.
array
(
[[
1.7802081
,
2.516249
,
-
1.7501148
,
1.75
,
3.39
,
1.65
,
1.48
],
...
...
@@ -70,9 +110,19 @@ def test_lidar_boxes3d():
[
28.2967
,
0.5557558
,
-
1.303325
,
1.47
,
2.23
,
1.48
,
4.7115927
],
[
26.66902
,
-
21.82302
,
-
1.736057
,
1.56
,
3.48
,
1.4
,
4.8315926
],
[
31.31978
,
-
8.162144
,
-
1.6217787
,
1.74
,
3.77
,
1.48
,
0.35159278
]])
boxes
.
flip
()
boxes
.
flip
(
'horizontal'
)
assert
torch
.
allclose
(
boxes
.
tensor
,
expected_tensor
)
expected_tensor
=
torch
.
tensor
(
[[
-
1.7802
,
-
2.5162
,
-
1.7501
,
1.7500
,
3.3900
,
1.6500
,
-
1.6616
],
[
-
8.9594
,
-
2.4567
,
-
1.6357
,
1.5400
,
4.0100
,
1.5700
,
-
1.5216
],
[
-
28.2967
,
0.5558
,
-
1.3033
,
1.4700
,
2.2300
,
1.4800
,
-
4.7116
],
[
-
26.6690
,
-
21.8230
,
-
1.7361
,
1.5600
,
3.4800
,
1.4000
,
-
4.8316
],
[
-
31.3198
,
-
8.1621
,
-
1.6218
,
1.7400
,
3.7700
,
1.4800
,
-
0.3516
]])
boxes_flip_vert
=
boxes
.
clone
()
boxes_flip_vert
.
flip
(
'vertical'
)
assert
torch
.
allclose
(
boxes_flip_vert
.
tensor
,
expected_tensor
,
1e-4
)
# test box rotation
expected_tensor
=
torch
.
tensor
(
[[
1.0385344
,
-
2.9020846
,
-
1.7501148
,
1.75
,
3.39
,
1.65
,
1.9336663
],
...
...
@@ -223,7 +273,7 @@ def test_lidar_boxes3d():
[
27.3398
,
-
18.3976
,
29.0896
,
-
14.6065
]])
# the pytorch print loses some precision
assert
torch
.
allclose
(
boxes
.
near
s
et_bev
,
expected_tensor
,
rtol
=
1e-4
,
atol
=
1e-7
)
boxes
.
neare
s
t_bev
,
expected_tensor
,
rtol
=
1e-4
,
atol
=
1e-7
)
# obtained by the print of the original implementation
expected_tensor
=
torch
.
tensor
([[[
2.4093e+00
,
-
4.4784e+00
,
-
1.9169e+00
],
...
...
@@ -269,6 +319,25 @@ def test_lidar_boxes3d():
# the pytorch print loses some precision
assert
torch
.
allclose
(
boxes
.
corners
,
expected_tensor
,
rtol
=
1e-4
,
atol
=
1e-7
)
# test new_box
new_box1
=
boxes
.
new_box
([[
1
,
2
,
3
,
4
,
5
,
6
,
7
]])
assert
torch
.
allclose
(
new_box1
.
tensor
,
torch
.
tensor
([[
1
,
2
,
3
,
4
,
5
,
6
,
7
]],
dtype
=
boxes
.
tensor
.
dtype
))
assert
new_box1
.
device
==
boxes
.
device
assert
new_box1
.
with_yaw
==
boxes
.
with_yaw
assert
new_box1
.
box_dim
==
boxes
.
box_dim
new_box2
=
boxes
.
new_box
(
np
.
array
([[
1
,
2
,
3
,
4
,
5
,
6
,
7
]]))
assert
torch
.
allclose
(
new_box2
.
tensor
,
torch
.
tensor
([[
1
,
2
,
3
,
4
,
5
,
6
,
7
]],
dtype
=
boxes
.
tensor
.
dtype
))
new_box3
=
boxes
.
new_box
(
torch
.
tensor
([[
1
,
2
,
3
,
4
,
5
,
6
,
7
]]))
assert
torch
.
allclose
(
new_box3
.
tensor
,
torch
.
tensor
([[
1
,
2
,
3
,
4
,
5
,
6
,
7
]],
dtype
=
boxes
.
tensor
.
dtype
))
def
test_boxes_conversion
():
"""Test the conversion of boxes between different modes.
...
...
@@ -284,6 +353,8 @@ def test_boxes_conversion():
[
31.31978
,
8.162144
,
-
1.6217787
,
1.74
,
3.77
,
1.48
,
2.79
]])
cam_box_tensor
=
Box3DMode
.
convert
(
lidar_boxes
.
tensor
,
Box3DMode
.
LIDAR
,
Box3DMode
.
CAM
)
expected_box
=
lidar_boxes
.
convert_to
(
Box3DMode
.
CAM
)
assert
torch
.
equal
(
expected_box
.
tensor
,
cam_box_tensor
)
# Some properties should be the same
cam_boxes
=
CameraInstance3DBoxes
(
cam_box_tensor
)
...
...
@@ -310,16 +381,10 @@ def test_boxes_conversion():
Box3DMode
.
DEPTH
,
Box3DMode
.
CAM
)
assert
torch
.
allclose
(
cam_box_tensor
,
depth_to_cam_box_tensor
)
# test error raise with not supported conversion
with
pytest
.
raises
(
NotImplementedError
):
Box3DMode
.
convert
(
lidar_box_tensor
,
Box3DMode
.
LIDAR
,
Box3DMode
.
DEPTH
)
with
pytest
.
raises
(
NotImplementedError
):
Box3DMode
.
convert
(
depth_box_tensor
,
Box3DMode
.
DEPTH
,
Box3DMode
.
LIDAR
)
# test similar mode conversion
same_results
=
Box3DMode
.
convert
(
depth_box_tensor
,
Box3DMode
.
DEPTH
,
Box3DMode
.
DEPTH
)
assert
(
same_results
==
depth_box_tensor
)
.
all
()
assert
torch
.
equal
(
same_results
,
depth_box_tensor
)
# test conversion with a given rt_mat
camera_boxes
=
CameraInstance3DBoxes
(
...
...
@@ -389,6 +454,35 @@ def test_boxes_conversion():
rt_mat
.
inverse
().
numpy
())
assert
np
.
allclose
(
np
.
array
(
cam_to_lidar_box
),
expected_tensor
[
0
].
numpy
())
# test convert from depth to lidar
depth_boxes
=
torch
.
tensor
(
[[
2.4593
,
2.5870
,
-
0.4321
,
0.8597
,
0.6193
,
1.0204
,
3.0693
],
[
1.4856
,
2.5299
,
-
0.5570
,
0.9385
,
2.1404
,
0.8954
,
3.0601
]],
dtype
=
torch
.
float32
)
depth_boxes
=
DepthInstance3DBoxes
(
depth_boxes
)
depth_to_lidar_box
=
depth_boxes
.
convert_to
(
Box3DMode
.
LIDAR
)
expected_box
=
depth_to_lidar_box
.
convert_to
(
Box3DMode
.
DEPTH
)
assert
torch
.
equal
(
depth_boxes
.
tensor
,
expected_box
.
tensor
)
lidar_to_depth_box
=
Box3DMode
.
convert
(
depth_to_lidar_box
,
Box3DMode
.
LIDAR
,
Box3DMode
.
DEPTH
)
assert
torch
.
allclose
(
depth_boxes
.
tensor
,
lidar_to_depth_box
.
tensor
)
assert
torch
.
allclose
(
depth_boxes
.
volume
,
lidar_to_depth_box
.
volume
)
# test convert from depth to camera
depth_to_cam_box
=
Box3DMode
.
convert
(
depth_boxes
,
Box3DMode
.
DEPTH
,
Box3DMode
.
CAM
)
cam_to_depth_box
=
Box3DMode
.
convert
(
depth_to_cam_box
,
Box3DMode
.
CAM
,
Box3DMode
.
DEPTH
)
expected_tensor
=
depth_to_cam_box
.
convert_to
(
Box3DMode
.
DEPTH
)
assert
torch
.
equal
(
expected_tensor
.
tensor
,
cam_to_depth_box
.
tensor
)
assert
torch
.
allclose
(
depth_boxes
.
tensor
,
cam_to_depth_box
.
tensor
)
assert
torch
.
allclose
(
depth_boxes
.
volume
,
cam_to_depth_box
.
volume
)
with
pytest
.
raises
(
NotImplementedError
):
# assert invalid convert mode
Box3DMode
.
convert
(
depth_boxes
,
Box3DMode
.
DEPTH
,
3
)
def
test_camera_boxes3d
():
# Test init with numpy array
...
...
@@ -449,9 +543,19 @@ def test_camera_boxes3d():
[
26.66902
,
-
21.82302
,
-
1.736057
,
1.56
,
3.48
,
1.4
,
4.8315926
],
[
31.31978
,
-
8.162144
,
-
1.6217787
,
1.74
,
3.77
,
1.48
,
0.35159278
]]),
Box3DMode
.
LIDAR
,
Box3DMode
.
CAM
)
boxes
.
flip
()
boxes
.
flip
(
'horizontal'
)
assert
torch
.
allclose
(
boxes
.
tensor
,
expected_tensor
)
expected_tensor
=
torch
.
tensor
(
[[
2.5162
,
1.7501
,
-
1.7802
,
3.3900
,
1.6500
,
1.7500
,
-
1.6616
],
[
2.4567
,
1.6357
,
-
8.9594
,
4.0100
,
1.5700
,
1.5400
,
-
1.5216
],
[
-
0.5558
,
1.3033
,
-
28.2967
,
2.2300
,
1.4800
,
1.4700
,
-
4.7116
],
[
21.8230
,
1.7361
,
-
26.6690
,
3.4800
,
1.4000
,
1.5600
,
-
4.8316
],
[
8.1621
,
1.6218
,
-
31.3198
,
3.7700
,
1.4800
,
1.7400
,
-
0.3516
]])
boxes_flip_vert
=
boxes
.
clone
()
boxes_flip_vert
.
flip
(
'vertical'
)
assert
torch
.
allclose
(
boxes_flip_vert
.
tensor
,
expected_tensor
,
1e-4
)
# test box rotation
expected_tensor
=
Box3DMode
.
convert
(
torch
.
tensor
(
...
...
@@ -560,7 +664,7 @@ def test_camera_boxes3d():
expected_tensor
[:,
1
::
2
]
=
lidar_expected_tensor
[:,
0
::
2
]
# the pytorch print loses some precision
assert
torch
.
allclose
(
boxes
.
near
s
et_bev
,
expected_tensor
,
rtol
=
1e-4
,
atol
=
1e-7
)
boxes
.
neare
s
t_bev
,
expected_tensor
,
rtol
=
1e-4
,
atol
=
1e-7
)
# obtained by the print of the original implementation
expected_tensor
=
torch
.
tensor
([[[
3.2684e+00
,
2.5769e-01
,
-
7.7767e-01
],
...
...
@@ -659,3 +763,130 @@ def test_boxes3d_overlaps():
cam_boxes1
.
overlaps
(
cam_boxes1
,
boxes1
)
with
pytest
.
raises
(
AssertionError
):
boxes1
.
overlaps
(
cam_boxes1
,
boxes1
)
def
test_depth_boxes3d
():
# test empty initialization
empty_boxes
=
[]
boxes
=
DepthInstance3DBoxes
(
empty_boxes
)
assert
boxes
.
tensor
.
shape
[
0
]
==
0
assert
boxes
.
tensor
.
shape
[
1
]
==
7
# Test init with numpy array
np_boxes
=
np
.
array
(
[[
1.4856
,
2.5299
,
-
0.5570
,
0.9385
,
2.1404
,
0.8954
,
3.0601
],
[
2.3262
,
3.3065
,
--
0.44255
,
0.8234
,
0.5325
,
1.0099
,
2.9971
]],
dtype
=
np
.
float32
)
boxes_1
=
DepthInstance3DBoxes
(
np_boxes
)
assert
torch
.
allclose
(
boxes_1
.
tensor
,
torch
.
from_numpy
(
np_boxes
))
# test properties
assert
boxes_1
.
volume
.
size
(
0
)
==
2
assert
(
boxes_1
.
center
==
boxes_1
.
bottom_center
).
all
()
expected_tensor
=
torch
.
tensor
([[
1.4856
,
2.5299
,
-
0.1093
],
[
2.3262
,
3.3065
,
0.9475
]])
assert
torch
.
allclose
(
boxes_1
.
gravity_center
,
expected_tensor
)
expected_tensor
=
torch
.
tensor
([[
1.4856
,
2.5299
,
0.9385
,
2.1404
,
3.0601
],
[
2.3262
,
3.3065
,
0.8234
,
0.5325
,
2.9971
]])
assert
torch
.
allclose
(
boxes_1
.
bev
,
expected_tensor
)
expected_tensor
=
torch
.
tensor
([[
1.0164
,
1.4597
,
1.9548
,
3.6001
],
[
1.9145
,
3.0402
,
2.7379
,
3.5728
]])
assert
torch
.
allclose
(
boxes_1
.
nearest_bev
,
expected_tensor
,
1e-4
)
assert
repr
(
boxes
)
==
(
'DepthInstance3DBoxes(
\n
tensor([], size=(0, 7)))'
)
# test init with torch.Tensor
th_boxes
=
torch
.
tensor
(
[[
2.4593
,
2.5870
,
-
0.4321
,
0.8597
,
0.6193
,
1.0204
,
3.0693
],
[
1.4856
,
2.5299
,
-
0.5570
,
0.9385
,
2.1404
,
0.8954
,
3.0601
]],
dtype
=
torch
.
float32
)
boxes_2
=
DepthInstance3DBoxes
(
th_boxes
)
assert
torch
.
allclose
(
boxes_2
.
tensor
,
th_boxes
)
# test clone/to/device
boxes_2
=
boxes_2
.
clone
()
boxes_1
=
boxes_1
.
to
(
boxes_2
.
device
)
# test box concatenation
expected_tensor
=
torch
.
tensor
(
[[
1.4856
,
2.5299
,
-
0.5570
,
0.9385
,
2.1404
,
0.8954
,
3.0601
],
[
2.3262
,
3.3065
,
--
0.44255
,
0.8234
,
0.5325
,
1.0099
,
2.9971
],
[
2.4593
,
2.5870
,
-
0.4321
,
0.8597
,
0.6193
,
1.0204
,
3.0693
],
[
1.4856
,
2.5299
,
-
0.5570
,
0.9385
,
2.1404
,
0.8954
,
3.0601
]])
boxes
=
DepthInstance3DBoxes
.
cat
([
boxes_1
,
boxes_2
])
assert
torch
.
allclose
(
boxes
.
tensor
,
expected_tensor
)
# concatenate empty list
empty_boxes
=
DepthInstance3DBoxes
.
cat
([])
assert
empty_boxes
.
tensor
.
shape
[
0
]
==
0
assert
empty_boxes
.
tensor
.
shape
[
-
1
]
==
7
# test box flip
expected_tensor
=
torch
.
tensor
(
[[
-
1.4856
,
2.5299
,
-
0.5570
,
0.9385
,
2.1404
,
0.8954
,
0.0815
],
[
-
2.3262
,
3.3065
,
0.4426
,
0.8234
,
0.5325
,
1.0099
,
0.1445
],
[
-
2.4593
,
2.5870
,
-
0.4321
,
0.8597
,
0.6193
,
1.0204
,
0.0723
],
[
-
1.4856
,
2.5299
,
-
0.5570
,
0.9385
,
2.1404
,
0.8954
,
0.0815
]])
boxes
.
flip
(
bev_direction
=
'horizontal'
)
assert
torch
.
allclose
(
boxes
.
tensor
,
expected_tensor
,
1e-3
)
expected_tensor
=
torch
.
tensor
(
[[
-
1.4856
,
-
2.5299
,
-
0.5570
,
0.9385
,
2.1404
,
0.8954
,
-
0.0815
],
[
-
2.3262
,
-
3.3065
,
0.4426
,
0.8234
,
0.5325
,
1.0099
,
-
0.1445
],
[
-
2.4593
,
-
2.5870
,
-
0.4321
,
0.8597
,
0.6193
,
1.0204
,
-
0.0723
],
[
-
1.4856
,
-
2.5299
,
-
0.5570
,
0.9385
,
2.1404
,
0.8954
,
-
0.0815
]])
boxes
.
flip
(
bev_direction
=
'vertical'
)
assert
torch
.
allclose
(
boxes
.
tensor
,
expected_tensor
,
1e-3
)
# test box rotation
boxes_rot
=
boxes
.
clone
()
expected_tensor
=
torch
.
tensor
(
[[
-
1.6004
,
-
2.4589
,
-
0.5570
,
0.9385
,
2.1404
,
0.8954
,
-
0.0355
],
[
-
2.4758
,
-
3.1960
,
0.4426
,
0.8234
,
0.5325
,
1.0099
,
-
0.0985
],
[
-
2.5757
,
-
2.4712
,
-
0.4321
,
0.8597
,
0.6193
,
1.0204
,
-
0.0263
],
[
-
1.6004
,
-
2.4589
,
-
0.5570
,
0.9385
,
2.1404
,
0.8954
,
-
0.0355
]])
boxes_rot
.
rotate
(
-
0.04599790655000615
)
assert
torch
.
allclose
(
boxes_rot
.
tensor
,
expected_tensor
,
1e-3
)
th_boxes
=
torch
.
tensor
(
[[
0.61211395
,
0.8129094
,
0.10563634
,
1.497534
,
0.16927195
,
0.27956772
],
[
1.430009
,
0.49797538
,
0.9382923
,
0.07694054
,
0.9312509
,
1.8919173
]],
dtype
=
torch
.
float32
)
boxes
=
DepthInstance3DBoxes
(
th_boxes
,
box_dim
=
6
,
with_yaw
=
False
)
expected_tensor
=
torch
.
tensor
([[
0.64884546
,
0.78390356
,
0.10563634
,
1.50373348
,
0.23795205
,
0.27956772
,
0
],
[
1.45139421
,
0.43169443
,
0.93829232
,
0.11967964
,
0.93380373
,
1.89191735
,
0
]])
boxes_3
=
boxes
.
clone
()
boxes_3
.
rotate
(
-
0.04599790655000615
)
assert
torch
.
allclose
(
boxes_3
.
tensor
,
expected_tensor
)
boxes
.
rotate
(
torch
.
tensor
(
-
0.04599790655000615
))
assert
torch
.
allclose
(
boxes
.
tensor
,
expected_tensor
)
# test bbox in_range_bev
expected_tensor
=
torch
.
tensor
([
1
,
1
],
dtype
=
torch
.
bool
)
mask
=
boxes
.
in_range_bev
([
0.
,
-
40.
,
70.4
,
40.
])
assert
(
mask
==
expected_tensor
).
all
()
mask
=
boxes
.
nonempty
()
assert
(
mask
==
expected_tensor
).
all
()
expected_tensor
=
torch
.
tensor
([[[
-
0.1030
,
0.6649
,
0.1056
],
[
-
0.1030
,
0.6649
,
0.3852
],
[
-
0.1030
,
0.9029
,
0.3852
],
[
-
0.1030
,
0.9029
,
0.1056
],
[
1.4007
,
0.6649
,
0.1056
],
[
1.4007
,
0.6649
,
0.3852
],
[
1.4007
,
0.9029
,
0.3852
],
[
1.4007
,
0.9029
,
0.1056
]],
[[
1.3916
,
-
0.0352
,
0.9383
],
[
1.3916
,
-
0.0352
,
2.8302
],
[
1.3916
,
0.8986
,
2.8302
],
[
1.3916
,
0.8986
,
0.9383
],
[
1.5112
,
-
0.0352
,
0.9383
],
[
1.5112
,
-
0.0352
,
2.8302
],
[
1.5112
,
0.8986
,
2.8302
],
[
1.5112
,
0.8986
,
0.9383
]]])
torch
.
allclose
(
boxes
.
corners
,
expected_tensor
)
tests/test_pipeline/test_outdoor_pipeline.py
0 → 100644
View file @
f2b01720
import
numpy
as
np
import
torch
from
mmdet3d.core.bbox
import
LiDARInstance3DBoxes
from
mmdet3d.datasets.pipelines
import
Compose
def
test_outdoor_aug_pipeline
():
point_cloud_range
=
[
0
,
-
40
,
-
3
,
70.4
,
40
,
1
]
class_names
=
[
'Car'
]
np
.
random
.
seed
(
0
)
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
),
dict
(
type
=
'ObjectNoise'
,
num_try
=
100
,
loc_noise_std
=
[
1.0
,
1.0
,
0.5
],
global_rot_range
=
[
0.0
,
0.0
],
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
]),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0.5
),
dict
(
type
=
'GlobalRotScale'
,
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
],
scaling_uniform_noise
=
[
0.95
,
1.05
]),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
]
pipeline
=
Compose
(
train_pipeline
)
gt_bboxes_3d
=
LiDARInstance3DBoxes
(
torch
.
tensor
([
[
2.16902428e+01
,
-
4.06038128e-02
,
-
1.61906636e+00
,
1.65999997e+00
,
3.20000005e+00
,
1.61000001e+00
,
-
1.53999996e+00
],
[
7.05006886e+00
,
-
6.57459593e+00
,
-
1.60107934e+00
,
2.27999997e+00
,
1.27799997e+01
,
3.66000009e+00
,
1.54999995e+00
],
[
2.24698811e+01
,
-
6.69203758e+00
,
-
1.50118136e+00
,
2.31999993e+00
,
1.47299995e+01
,
3.64000010e+00
,
1.59000003e+00
],
[
3.48291969e+01
,
-
7.09058380e+00
,
-
1.36622977e+00
,
2.31999993e+00
,
1.00400000e+01
,
3.60999990e+00
,
1.61000001e+00
],
[
4.62394600e+01
,
-
7.75838804e+00
,
-
1.32405007e+00
,
2.33999991e+00
,
1.28299999e+01
,
3.63000011e+00
,
1.63999999e+00
],
[
2.82966995e+01
,
-
5.55755794e-01
,
-
1.30332506e+00
,
1.47000003e+00
,
2.23000002e+00
,
1.48000002e+00
,
-
1.57000005e+00
],
[
2.66690197e+01
,
2.18230209e+01
,
-
1.73605704e+00
,
1.55999994e+00
,
3.48000002e+00
,
1.39999998e+00
,
-
1.69000006e+00
],
[
3.13197803e+01
,
8.16214371e+00
,
-
1.62177873e+00
,
1.74000001e+00
,
3.76999998e+00
,
1.48000002e+00
,
2.78999996e+00
],
[
4.34395561e+01
,
-
1.95209332e+01
,
-
1.20757008e+00
,
1.69000006e+00
,
4.09999990e+00
,
1.40999997e+00
,
-
1.53999996e+00
],
[
3.29882965e+01
,
-
3.79360509e+00
,
-
1.69245458e+00
,
1.74000001e+00
,
4.09000015e+00
,
1.49000001e+00
,
-
1.52999997e+00
],
[
3.85469360e+01
,
8.35060215e+00
,
-
1.31423414e+00
,
1.59000003e+00
,
4.28000021e+00
,
1.45000005e+00
,
1.73000002e+00
],
[
2.22492104e+01
,
-
1.13536005e+01
,
-
1.38272512e+00
,
1.62000000e+00
,
3.55999994e+00
,
1.71000004e+00
,
2.48000002e+00
],
[
3.36115799e+01
,
-
1.97708054e+01
,
-
4.92827654e-01
,
1.64999998e+00
,
3.54999995e+00
,
1.79999995e+00
,
-
1.57000005e+00
],
[
9.85029602e+00
,
-
1.51294518e+00
,
-
1.66834795e+00
,
1.59000003e+00
,
3.17000008e+00
,
1.38999999e+00
,
-
8.39999974e-01
]
],
dtype
=
torch
.
float32
))
gt_labels_3d
=
np
.
array
([
0
,
-
1
,
-
1
,
-
1
,
-
1
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
])
results
=
dict
(
pts_filename
=
'tests/data/kitti/a.bin'
,
ann_info
=
dict
(
gt_bboxes_3d
=
gt_bboxes_3d
,
gt_labels_3d
=
gt_labels_3d
),
bbox3d_fields
=
[],
)
output
=
pipeline
(
results
)
expected_tensor
=
torch
.
tensor
(
[[
20.6514
,
-
8.8250
,
-
1.0816
,
1.5893
,
3.0637
,
1.5414
,
-
1.9216
],
[
7.9374
,
4.9457
,
-
1.2008
,
2.1829
,
12.2357
,
3.5041
,
1.6629
],
[
20.8115
,
-
2.0273
,
-
1.8893
,
2.2212
,
14.1026
,
3.4850
,
2.6513
],
[
32.3850
,
-
5.2135
,
-
1.1321
,
2.2212
,
9.6124
,
3.4562
,
2.6498
],
[
43.7022
,
-
7.8316
,
-
0.5090
,
2.2403
,
12.2836
,
3.4754
,
2.0146
],
[
25.3300
,
-
9.6670
,
-
1.0855
,
1.4074
,
2.1350
,
1.4170
,
-
0.7141
],
[
16.5414
,
-
29.0583
,
-
0.9768
,
1.4936
,
3.3318
,
1.3404
,
-
0.7153
],
[
24.6548
,
-
18.9226
,
-
1.3567
,
1.6659
,
3.6094
,
1.4170
,
1.3970
],
[
45.8403
,
1.8183
,
-
1.1626
,
1.6180
,
3.9254
,
1.3499
,
-
0.6886
],
[
30.6288
,
-
8.4497
,
-
1.4881
,
1.6659
,
3.9158
,
1.4265
,
-
0.7241
],
[
32.3316
,
-
22.4611
,
-
1.3131
,
1.5223
,
4.0977
,
1.3882
,
2.4186
],
[
22.4492
,
3.2944
,
-
2.1674
,
1.5510
,
3.4084
,
1.6372
,
0.3928
],
[
37.3824
,
5.0472
,
-
0.6579
,
1.5797
,
3.3988
,
1.7233
,
-
1.4862
],
[
8.9259
,
-
1.2578
,
-
1.6081
,
1.5223
,
3.0350
,
1.3308
,
-
1.7212
]])
assert
torch
.
allclose
(
output
[
'gt_bboxes_3d'
].
_data
.
tensor
,
expected_tensor
,
atol
=
1e-3
)
def
test_outdoor_velocity_aug_pipeline
():
point_cloud_range
=
[
-
50
,
-
50
,
-
5
,
50
,
50
,
3
]
class_names
=
[
'car'
,
'truck'
,
'trailer'
,
'bus'
,
'construction_vehicle'
,
'bicycle'
,
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
]
np
.
random
.
seed
(
0
)
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
),
dict
(
type
=
'GlobalRotScale'
,
rot_uniform_noise
=
[
-
0.3925
,
0.3925
],
scaling_uniform_noise
=
[
0.95
,
1.05
],
trans_normal_noise
=
[
0
,
0
,
0
]),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0.5
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
]
pipeline
=
Compose
(
train_pipeline
)
gt_bboxes_3d
=
LiDARInstance3DBoxes
(
torch
.
tensor
(
[[
-
5.2422e+00
,
4.0021e+01
,
-
4.7643e-01
,
2.0620e+00
,
4.4090e+00
,
1.5480e+00
,
-
1.4880e+00
,
8.5338e-03
,
4.4934e-02
],
[
-
2.6675e+01
,
5.5950e+00
,
-
1.3053e+00
,
3.4300e-01
,
4.5800e-01
,
7.8200e-01
,
-
4.6276e+00
,
-
4.3284e-04
,
-
1.8543e-03
],
[
-
5.8098e+00
,
3.5409e+01
,
-
6.6511e-01
,
2.3960e+00
,
3.9690e+00
,
1.7320e+00
,
-
4.6520e+00
,
0.0000e+00
,
0.0000e+00
],
[
-
3.1309e+01
,
1.0901e+00
,
-
1.0561e+00
,
1.9440e+00
,
3.8570e+00
,
1.7230e+00
,
-
2.8143e+00
,
-
2.7606e-02
,
-
8.0573e-02
],
[
-
4.5642e+01
,
2.0136e+01
,
-
2.4681e-02
,
1.9870e+00
,
4.4400e+00
,
1.9420e+00
,
2.8336e-01
,
0.0000e+00
,
0.0000e+00
],
[
-
5.1617e+00
,
1.8305e+01
,
-
1.0879e+00
,
2.3230e+00
,
4.8510e+00
,
1.3710e+00
,
-
1.5803e+00
,
0.0000e+00
,
0.0000e+00
],
[
-
2.5285e+01
,
4.1442e+00
,
-
1.2713e+00
,
1.7550e+00
,
1.9890e+00
,
2.2200e+00
,
-
4.4900e+00
,
-
3.1784e-02
,
-
1.5291e-01
],
[
-
2.2611e+00
,
1.9170e+01
,
-
1.1452e+00
,
9.1900e-01
,
1.1230e+00
,
1.9310e+00
,
4.7790e-02
,
6.7684e-02
,
-
1.7537e+00
],
[
-
6.5878e+01
,
1.3500e+01
,
-
2.2528e-01
,
1.8200e+00
,
3.8520e+00
,
1.5450e+00
,
-
2.8757e+00
,
0.0000e+00
,
0.0000e+00
],
[
-
5.4490e+00
,
2.8363e+01
,
-
7.7275e-01
,
2.2360e+00
,
3.7540e+00
,
1.5590e+00
,
-
4.6520e+00
,
-
7.9736e-03
,
7.7207e-03
]],
dtype
=
torch
.
float32
),
box_dim
=
9
)
gt_labels_3d
=
np
.
array
([
0
,
8
,
0
,
0
,
0
,
0
,
-
1
,
7
,
0
,
0
])
results
=
dict
(
pts_filename
=
'tests/data/kitti/a.bin'
,
ann_info
=
dict
(
gt_bboxes_3d
=
gt_bboxes_3d
,
gt_labels_3d
=
gt_labels_3d
),
bbox3d_fields
=
[],
)
output
=
pipeline
(
results
)
expected_tensor
=
torch
.
tensor
(
[[
-
3.7849e+00
,
-
4.1057e+01
,
-
4.8668e-01
,
2.1064e+00
,
4.5039e+00
,
1.5813e+00
,
-
1.6919e+00
,
1.0469e-02
,
-
4.5533e-02
],
[
-
2.7010e+01
,
-
6.7551e+00
,
-
1.3334e+00
,
3.5038e-01
,
4.6786e-01
,
7.9883e-01
,
1.4477e+00
,
-
5.1440e-04
,
1.8758e-03
],
[
-
4.5448e+00
,
-
3.6372e+01
,
-
6.7942e-01
,
2.4476e+00
,
4.0544e+00
,
1.7693e+00
,
1.4721e+00
,
0.0000e+00
,
-
0.0000e+00
],
[
-
3.1916e+01
,
-
2.3379e+00
,
-
1.0788e+00
,
1.9858e+00
,
3.9400e+00
,
1.7601e+00
,
-
3.6564e-01
,
-
3.1333e-02
,
8.1166e-02
],
[
-
4.5802e+01
,
-
2.2340e+01
,
-
2.5213e-02
,
2.0298e+00
,
4.5355e+00
,
1.9838e+00
,
2.8199e+00
,
0.0000e+00
,
-
0.0000e+00
],
[
-
4.5526e+00
,
-
1.8887e+01
,
-
1.1114e+00
,
2.3730e+00
,
4.9554e+00
,
1.4005e+00
,
-
1.5997e+00
,
0.0000e+00
,
-
0.0000e+00
],
[
-
2.5648e+01
,
-
5.2197e+00
,
-
1.2987e+00
,
1.7928e+00
,
2.0318e+00
,
2.2678e+00
,
1.3100e+00
,
-
3.8428e-02
,
1.5485e-01
],
[
-
1.5578e+00
,
-
1.9657e+01
,
-
1.1699e+00
,
9.3878e-01
,
1.1472e+00
,
1.9726e+00
,
3.0555e+00
,
4.5907e-04
,
1.7928e+00
],
[
-
4.4522e+00
,
-
2.9166e+01
,
-
7.8938e-01
,
2.2841e+00
,
3.8348e+00
,
1.5925e+00
,
1.4721e+00
,
-
7.8371e-03
,
-
8.1931e-03
]])
assert
torch
.
allclose
(
output
[
'gt_bboxes_3d'
].
_data
.
tensor
,
expected_tensor
,
atol
=
1e-3
)
tests/test_semantic_heads.py
View file @
f2b01720
import
pytest
import
torch
from
mmdet3d.core.bbox
import
LiDARInstance3DBoxes
def
test_PointwiseSemanticHead
():
# PointwiseSemanticHead only support gpu version currently.
...
...
@@ -47,19 +49,29 @@ def test_PointwiseSemanticHead():
[
1
,
35
,
930
,
469
]],
dtype
=
torch
.
int32
).
cuda
()
# n, 4(batch, ind_x, ind_y, ind_z)
voxel_dict
=
dict
(
voxel_centers
=
voxel_centers
,
coors
=
coordinates
)
gt_bboxes
=
list
(
torch
.
tensor
(
[[[
6.4118
,
-
3.4305
,
-
1.7291
,
1.7033
,
3.4693
,
1.6197
,
-
0.9091
]],
[[
16.9107
,
9.7925
,
-
1.9201
,
1.6097
,
3.2786
,
1.5307
,
-
2.4056
]]],
dtype
=
torch
.
float32
).
cuda
())
gt_bboxes
=
[
LiDARInstance3DBoxes
(
torch
.
tensor
(
[[
6.4118
,
-
3.4305
,
-
1.7291
,
1.7033
,
3.4693
,
1.6197
,
-
0.9091
]],
dtype
=
torch
.
float32
).
cuda
()),
LiDARInstance3DBoxes
(
torch
.
tensor
(
[[
16.9107
,
9.7925
,
-
1.9201
,
1.6097
,
3.2786
,
1.5307
,
-
2.4056
]],
dtype
=
torch
.
float32
).
cuda
())
]
# batch size is 2 in the unit test
gt_labels
=
list
(
torch
.
tensor
([[
0
],
[
1
]],
dtype
=
torch
.
int64
).
cuda
())
# test get_targets
target_dict
=
self
.
get_targets
(
voxel_dict
,
gt_bboxes
,
gt_labels
)
assert
target_dict
[
'seg_targets'
].
shape
==
torch
.
Size
(
[
voxel_features
.
shape
[
0
]])
assert
torch
.
allclose
(
target_dict
[
'seg_targets'
],
target_dict
[
'seg_targets'
].
new_tensor
([
3
,
-
1
,
3
,
3
]))
assert
target_dict
[
'part_targets'
].
shape
==
torch
.
Size
(
[
voxel_features
.
shape
[
0
],
3
])
assert
target_dict
[
'part_targets'
].
sum
()
==
0
# test loss
loss_dict
=
self
.
loss
(
feats_dict
,
target_dict
)
...
...
@@ -67,7 +79,3 @@ def test_PointwiseSemanticHead():
assert
loss_dict
[
'loss_part'
]
==
0
# no points in gt_boxes
total_loss
=
loss_dict
[
'loss_seg'
]
+
loss_dict
[
'loss_part'
]
total_loss
.
backward
()
if
__name__
==
'__main__'
:
test_PointwiseSemanticHead
()
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment