Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
f2b01720
Commit
f2b01720
authored
Jun 10, 2020
by
liyinhao
Browse files
Merge branch 'master' into process_raw_data
parents
08c8adb6
47850641
Changes
40
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1137 additions
and
535 deletions
+1137
-535
mmdet3d/core/bbox/structures/cam_box3d.py
mmdet3d/core/bbox/structures/cam_box3d.py
+47
-18
mmdet3d/core/bbox/structures/depth_box3d.py
mmdet3d/core/bbox/structures/depth_box3d.py
+203
-0
mmdet3d/core/bbox/structures/lidar_box3d.py
mmdet3d/core/bbox/structures/lidar_box3d.py
+83
-20
mmdet3d/core/optimizer/cocktail_constructor.py
mmdet3d/core/optimizer/cocktail_constructor.py
+1
-1
mmdet3d/core/optimizer/cocktail_optimizer.py
mmdet3d/core/optimizer/cocktail_optimizer.py
+9
-2
mmdet3d/datasets/kitti_dataset.py
mmdet3d/datasets/kitti_dataset.py
+6
-5
mmdet3d/datasets/nuscenes_dataset.py
mmdet3d/datasets/nuscenes_dataset.py
+8
-5
mmdet3d/datasets/pipelines/formating.py
mmdet3d/datasets/pipelines/formating.py
+11
-3
mmdet3d/datasets/pipelines/train_aug.py
mmdet3d/datasets/pipelines/train_aug.py
+21
-34
mmdet3d/models/dense_heads/train_mixins.py
mmdet3d/models/dense_heads/train_mixins.py
+2
-0
mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
+2
-4
mmdet3d/models/roi_heads/mask_heads/pointwise_semantic_head.py
...3d/models/roi_heads/mask_heads/pointwise_semantic_head.py
+28
-17
mmdet3d/models/roi_heads/part_aggregation_roi_head.py
mmdet3d/models/roi_heads/part_aggregation_roi_head.py
+5
-5
mmdet3d/models/voxel_encoders/__init__.py
mmdet3d/models/voxel_encoders/__init__.py
+4
-5
mmdet3d/models/voxel_encoders/pillar_encoder.py
mmdet3d/models/voxel_encoders/pillar_encoder.py
+40
-184
mmdet3d/models/voxel_encoders/utils.py
mmdet3d/models/voxel_encoders/utils.py
+13
-31
mmdet3d/models/voxel_encoders/voxel_encoder.py
mmdet3d/models/voxel_encoders/voxel_encoder.py
+152
-180
tests/test_box3d.py
tests/test_box3d.py
+243
-12
tests/test_pipeline/test_outdoor_pipeline.py
tests/test_pipeline/test_outdoor_pipeline.py
+242
-0
tests/test_semantic_heads.py
tests/test_semantic_heads.py
+17
-9
No files found.
mmdet3d/core/bbox/structures/cam_box3d.py
View file @
f2b01720
...
@@ -11,10 +11,10 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
...
@@ -11,10 +11,10 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
Coordinates in camera:
Coordinates in camera:
.. code-block:: none
.. code-block:: none
z front
z front
(yaw=0.5*pi)
/
/
/
/
0 ------> x right
0 ------> x right
(yaw=0)
|
|
|
|
v
v
...
@@ -22,11 +22,15 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
...
@@ -22,11 +22,15 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
The relative coordinate of bottom center in a CAM box is [0.5, 1.0, 0.5],
The relative coordinate of bottom center in a CAM box is [0.5, 1.0, 0.5],
and the yaw is around the y axis, thus the rotation axis=1.
and the yaw is around the y axis, thus the rotation axis=1.
The yaw is 0 at the positive direction of x axis, and increases from
the positive direction of x to the positive direction of z.
Attributes:
Attributes:
tensor (torch.Tensor): float matrix of N x box_dim.
tensor (torch.Tensor): float matrix of N x box_dim.
box_dim (int): integer indicates the dimension of a box
box_dim (int): integer indicates the dimension of a box
Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
with_yaw (bool): if True, the value of yaw will be set to 0 as minmax
boxes.
"""
"""
@
property
@
property
...
@@ -75,7 +79,7 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
...
@@ -75,7 +79,7 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
"""Calculate the coordinates of corners of all the boxes.
"""Calculate the coordinates of corners of all the boxes.
Convert the boxes to in clockwise order, in the form of
Convert the boxes to in clockwise order, in the form of
(x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z
0
, x1y1z
1
)
(x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z
1
, x1y1z
0
)
.. code-block:: none
.. code-block:: none
...
@@ -85,7 +89,7 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
...
@@ -85,7 +89,7 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
(x0, y0, z1) + ----------- + (x1, y0, z1)
(x0, y0, z1) + ----------- + (x1, y0, z1)
/| / |
/| / |
/ | / |
/ | / |
(x0, y0, z0) + ----------- + + (x1, y1, z
0
)
(x0, y0, z0) + ----------- + + (x1, y1, z
1
)
| / . | /
| / . | /
| / oriign | /
| / oriign | /
(x0, y1, z0) + ----------- + -------> x right
(x0, y1, z0) + ----------- + -------> x right
...
@@ -123,7 +127,7 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
...
@@ -123,7 +127,7 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
return
self
.
tensor
[:,
[
0
,
2
,
3
,
5
,
6
]]
return
self
.
tensor
[:,
[
0
,
2
,
3
,
5
,
6
]]
@
property
@
property
def
near
s
et_bev
(
self
):
def
neare
s
t_bev
(
self
):
"""Calculate the 2D bounding boxes in BEV without rotation
"""Calculate the 2D bounding boxes in BEV without rotation
Returns:
Returns:
...
@@ -150,11 +154,7 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
...
@@ -150,11 +154,7 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
"""Calculate whether the points is in any of the boxes
"""Calculate whether the points is in any of the boxes
Args:
Args:
angles (float | torch.Tensor): rotation angle
angle (float | torch.Tensor): rotation angle
Returns:
None if `return_rot_mat=False`,
torch.Tensor if `return_rot_mat=True`
"""
"""
if
not
isinstance
(
angle
,
torch
.
Tensor
):
if
not
isinstance
(
angle
,
torch
.
Tensor
):
angle
=
self
.
tensor
.
new_tensor
(
angle
)
angle
=
self
.
tensor
.
new_tensor
(
angle
)
...
@@ -166,13 +166,23 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
...
@@ -166,13 +166,23 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
self
.
tensor
[:,
:
3
]
=
self
.
tensor
[:,
:
3
]
@
rot_mat_T
self
.
tensor
[:,
:
3
]
=
self
.
tensor
[:,
:
3
]
@
rot_mat_T
self
.
tensor
[:,
6
]
+=
angle
self
.
tensor
[:,
6
]
+=
angle
def
flip
(
self
):
def
flip
(
self
,
bev_direction
=
'horizontal'
):
"""Flip the boxes in horizontal direction
"""Flip the boxes in BEV along given BEV direction
In CAM coordinates, it flips the x (horizontal) or z (vertical) axis.
In CAM coordinates, it flips the x axis.
Args:
bev_direction (str): Flip direction (horizontal or vertical).
"""
"""
self
.
tensor
[:,
0
::
7
]
=
-
self
.
tensor
[:,
0
::
7
]
assert
bev_direction
in
(
'horizontal'
,
'vertical'
)
self
.
tensor
[:,
6
]
=
-
self
.
tensor
[:,
6
]
+
np
.
pi
if
bev_direction
==
'horizontal'
:
self
.
tensor
[:,
0
::
7
]
=
-
self
.
tensor
[:,
0
::
7
]
if
self
.
with_yaw
:
self
.
tensor
[:,
6
]
=
-
self
.
tensor
[:,
6
]
+
np
.
pi
elif
bev_direction
==
'vertical'
:
self
.
tensor
[:,
2
::
7
]
=
-
self
.
tensor
[:,
2
::
7
]
if
self
.
with_yaw
:
self
.
tensor
[:,
6
]
=
-
self
.
tensor
[:,
6
]
def
in_range_bev
(
self
,
box_range
):
def
in_range_bev
(
self
,
box_range
):
"""Check whether the boxes are in the given range
"""Check whether the boxes are in the given range
...
@@ -188,8 +198,8 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
...
@@ -188,8 +198,8 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
TODO: check whether this will effect the performance
TODO: check whether this will effect the performance
Returns:
Returns:
a binary vector, i
ndicating whether each box is inside
torch.Tensor: I
ndicating whether each box is inside
the reference range.
the reference range.
"""
"""
in_range_flags
=
((
self
.
tensor
[:,
0
]
>
box_range
[
0
])
in_range_flags
=
((
self
.
tensor
[:,
0
]
>
box_range
[
0
])
&
(
self
.
tensor
[:,
2
]
>
box_range
[
1
])
&
(
self
.
tensor
[:,
2
]
>
box_range
[
1
])
...
@@ -230,3 +240,22 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
...
@@ -230,3 +240,22 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
lowest_of_top
=
torch
.
max
(
boxes1_top_height
,
boxes2_top_height
)
lowest_of_top
=
torch
.
max
(
boxes1_top_height
,
boxes2_top_height
)
overlaps_h
=
torch
.
clamp
(
heighest_of_bottom
-
lowest_of_top
,
min
=
0
)
overlaps_h
=
torch
.
clamp
(
heighest_of_bottom
-
lowest_of_top
,
min
=
0
)
return
overlaps_h
return
overlaps_h
def
convert_to
(
self
,
dst
,
rt_mat
=
None
):
"""Convert self to `dst` mode.
Args:
dst (BoxMode): the target Box mode
rt_mat (np.ndarray | torch.Tensor): The rotation and translation
matrix between different coordinates. Defaults to None.
The conversion from `src` coordinates to `dst` coordinates
usually comes along the change of sensors, e.g., from camera
to LiDAR. This requires a transformation matrix.
Returns:
BaseInstance3DBoxes:
The converted box of the same type in the `dst` mode.
"""
from
.box_3d_mode
import
Box3DMode
return
Box3DMode
.
convert
(
box
=
self
,
src
=
Box3DMode
.
CAM
,
dst
=
dst
,
rt_mat
=
rt_mat
)
mmdet3d/core/bbox/structures/depth_box3d.py
0 → 100644
View file @
f2b01720
import
numpy
as
np
import
torch
from
.base_box3d
import
BaseInstance3DBoxes
from
.utils
import
limit_period
,
rotation_3d_in_axis
class
DepthInstance3DBoxes
(
BaseInstance3DBoxes
):
"""3D boxes of instances in Depth coordinates
Coordinates in Depth:
.. code-block:: none
up z y front (yaw=0.5*pi)
^ ^
| /
| /
0 ------> x right (yaw=0)
The relative coordinate of bottom center in a Depth box is [0.5, 0.5, 0],
and the yaw is around the z axis, thus the rotation axis=2.
The yaw is 0 at the positive direction of x axis, and increases from
the positive direction of x to the positive direction of y.
Attributes:
tensor (torch.Tensor): float matrix of N x box_dim.
box_dim (int): integer indicates the dimension of a box
Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
with_yaw (bool): if True, the value of yaw will be set to 0 as minmax
boxes.
"""
@
property
def
gravity_center
(
self
):
"""Calculate the gravity center of all the boxes.
Returns:
torch.Tensor: a tensor with center of each box.
"""
bottom_center
=
self
.
bottom_center
gravity_center
=
torch
.
zeros_like
(
bottom_center
)
gravity_center
[:,
:
2
]
=
bottom_center
[:,
:
2
]
gravity_center
[:,
2
]
=
bottom_center
[:,
2
]
+
self
.
tensor
[:,
5
]
*
0.5
return
gravity_center
@
property
def
corners
(
self
):
"""Calculate the coordinates of corners of all the boxes.
Convert the boxes to corners in clockwise order, in form of
(x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z1, x1y1z0)
.. code-block:: none
up z
front y ^
/ |
/ |
(x0, y1, z1) + ----------- + (x1, y1, z1)
/| / |
/ | / |
(x0, y0, z1) + ----------- + + (x1, y1, z0)
| / . | /
| / oriign | /
(x0, y0, z0) + ----------- + --------> right x
(x1, y0, z0)
Returns:
torch.Tensor: corners of each box with size (N, 8, 3)
"""
dims
=
self
.
dims
corners_norm
=
torch
.
from_numpy
(
np
.
stack
(
np
.
unravel_index
(
np
.
arange
(
8
),
[
2
]
*
3
),
axis
=
1
)).
to
(
device
=
dims
.
device
,
dtype
=
dims
.
dtype
)
corners_norm
=
corners_norm
[[
0
,
1
,
3
,
2
,
4
,
5
,
7
,
6
]]
# use relative origin [0.5, 0.5, 0]
corners_norm
=
corners_norm
-
dims
.
new_tensor
([
0.5
,
0.5
,
0
])
corners
=
dims
.
view
([
-
1
,
1
,
3
])
*
corners_norm
.
reshape
([
1
,
8
,
3
])
# rotate around z axis
corners
=
rotation_3d_in_axis
(
corners
,
self
.
tensor
[:,
6
],
axis
=
2
)
corners
+=
self
.
tensor
[:,
:
3
].
view
(
-
1
,
1
,
3
)
return
corners
@
property
def
bev
(
self
):
"""Calculate the 2D bounding boxes in BEV with rotation
Returns:
torch.Tensor: a nx5 tensor of 2D BEV box of each box.
The box is in XYWHR format
"""
return
self
.
tensor
[:,
[
0
,
1
,
3
,
4
,
6
]]
@
property
def
nearest_bev
(
self
):
"""Calculate the 2D bounding boxes in BEV without rotation
Returns:
torch.Tensor: a tensor of 2D BEV box of each box.
"""
# Obtain BEV boxes with rotation in XYWHR format
bev_rotated_boxes
=
self
.
bev
# convert the rotation to a valid range
rotations
=
bev_rotated_boxes
[:,
-
1
]
normed_rotations
=
torch
.
abs
(
limit_period
(
rotations
,
0.5
,
np
.
pi
))
# find the center of boxes
conditions
=
(
normed_rotations
>
np
.
pi
/
4
)[...,
None
]
bboxes_xywh
=
torch
.
where
(
conditions
,
bev_rotated_boxes
[:,
[
0
,
1
,
3
,
2
]],
bev_rotated_boxes
[:,
:
4
])
centers
=
bboxes_xywh
[:,
:
2
]
dims
=
bboxes_xywh
[:,
2
:]
bev_boxes
=
torch
.
cat
([
centers
-
dims
/
2
,
centers
+
dims
/
2
],
dim
=-
1
)
return
bev_boxes
def
rotate
(
self
,
angle
):
"""Calculate whether the points is in any of the boxes
Args:
angle (float | torch.Tensor): rotation angle
"""
if
not
isinstance
(
angle
,
torch
.
Tensor
):
angle
=
self
.
tensor
.
new_tensor
(
angle
)
rot_sin
=
torch
.
sin
(
angle
)
rot_cos
=
torch
.
cos
(
angle
)
rot_mat
=
self
.
tensor
.
new_tensor
([[
rot_cos
,
-
rot_sin
,
0
],
[
rot_sin
,
rot_cos
,
0
],
[
0
,
0
,
1
]])
self
.
tensor
[:,
0
:
3
]
=
self
.
tensor
[:,
0
:
3
]
@
rot_mat
.
T
if
self
.
with_yaw
:
self
.
tensor
[:,
6
]
-=
angle
else
:
corners_rot
=
self
.
corners
@
rot_mat
.
T
new_x_size
=
corners_rot
[...,
0
].
max
(
dim
=
1
,
keepdim
=
True
)[
0
]
-
corners_rot
[...,
0
].
min
(
dim
=
1
,
keepdim
=
True
)[
0
]
new_y_size
=
corners_rot
[...,
1
].
max
(
dim
=
1
,
keepdim
=
True
)[
0
]
-
corners_rot
[...,
1
].
min
(
dim
=
1
,
keepdim
=
True
)[
0
]
self
.
tensor
[:,
3
:
5
]
=
torch
.
cat
((
new_x_size
,
new_y_size
),
dim
=-
1
)
def
flip
(
self
,
bev_direction
=
'horizontal'
):
"""Flip the boxes in BEV along given BEV direction
In Depth coordinates, it flips x (horizontal) or y (vertical) axis.
Args:
bev_direction (str): Flip direction (horizontal or vertical).
"""
assert
bev_direction
in
(
'horizontal'
,
'vertical'
)
if
bev_direction
==
'horizontal'
:
self
.
tensor
[:,
0
::
7
]
=
-
self
.
tensor
[:,
0
::
7
]
if
self
.
with_yaw
:
self
.
tensor
[:,
6
]
=
-
self
.
tensor
[:,
6
]
+
np
.
pi
elif
bev_direction
==
'vertical'
:
self
.
tensor
[:,
1
::
7
]
=
-
self
.
tensor
[:,
1
::
7
]
if
self
.
with_yaw
:
self
.
tensor
[:,
6
]
=
-
self
.
tensor
[:,
6
]
def
in_range_bev
(
self
,
box_range
):
"""Check whether the boxes are in the given range
Args:
box_range (list | torch.Tensor): the range of box
(x_min, y_min, x_max, y_max)
Note:
In the original implementation of SECOND, checking whether
a box in the range checks whether the points are in a convex
polygon, we try to reduce the burdun for simpler cases.
TODO: check whether this will effect the performance
Returns:
torch.Tensor: Indicating whether each box is inside
the reference range.
"""
in_range_flags
=
((
self
.
tensor
[:,
0
]
>
box_range
[
0
])
&
(
self
.
tensor
[:,
1
]
>
box_range
[
1
])
&
(
self
.
tensor
[:,
0
]
<
box_range
[
2
])
&
(
self
.
tensor
[:,
1
]
<
box_range
[
3
]))
return
in_range_flags
def
convert_to
(
self
,
dst
,
rt_mat
=
None
):
"""Convert self to `dst` mode.
Args:
dst (BoxMode): the target Box mode
rt_mat (np.ndarray | torch.Tensor): The rotation and translation
matrix between different coordinates. Defaults to None.
The conversion from `src` coordinates to `dst` coordinates
usually comes along the change of sensors, e.g., from camera
to LiDAR. This requires a transformation matrix.
Returns:
BaseInstance3DBoxes:
The converted box of the same type in the `dst` mode.
"""
from
.box_3d_mode
import
Box3DMode
return
Box3DMode
.
convert
(
box
=
self
,
src
=
Box3DMode
.
DEPTH
,
dst
=
dst
,
rt_mat
=
rt_mat
)
mmdet3d/core/bbox/structures/lidar_box3d.py
View file @
f2b01720
import
numpy
as
np
import
numpy
as
np
import
torch
import
torch
from
mmdet3d.ops.roiaware_pool3d
import
points_in_boxes_gpu
from
.base_box3d
import
BaseInstance3DBoxes
from
.base_box3d
import
BaseInstance3DBoxes
from
.utils
import
limit_period
,
rotation_3d_in_axis
from
.utils
import
limit_period
,
rotation_3d_in_axis
...
@@ -11,19 +12,23 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
...
@@ -11,19 +12,23 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
Coordinates in LiDAR:
Coordinates in LiDAR:
.. code-block:: none
.. code-block:: none
up z x front
up z x front
(yaw=0.5*pi)
^ ^
^ ^
| /
| /
| /
| /
left y <------ 0
(yaw=pi)
left y <------ 0
The relative coordinate of bottom center in a LiDAR box is [0.5, 0.5, 0],
The relative coordinate of bottom center in a LiDAR box is [0.5, 0.5, 0],
and the yaw is around the z axis, thus the rotation axis=2.
and the yaw is around the z axis, thus the rotation axis=2.
The yaw is 0 at the negative direction of y axis, and increases from
the negative direction of y to the positive direction of x.
Attributes:
Attributes:
tensor (torch.Tensor): float matrix of N x box_dim.
tensor (torch.Tensor): float matrix of N x box_dim.
box_dim (int): integer indicates the dimension of a box
box_dim (int): integer indicates the dimension of a box
Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
with_yaw (bool): if True, the value of yaw will be set to 0 as minmax
boxes.
"""
"""
@
property
@
property
...
@@ -44,7 +49,7 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
...
@@ -44,7 +49,7 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
"""Calculate the coordinates of corners of all the boxes.
"""Calculate the coordinates of corners of all the boxes.
Convert the boxes to corners in clockwise order, in form of
Convert the boxes to corners in clockwise order, in form of
(x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z
0
, x1y1z
1
)
(x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z
1
, x1y1z
0
)
.. code-block:: none
.. code-block:: none
...
@@ -90,7 +95,7 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
...
@@ -90,7 +95,7 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
return
self
.
tensor
[:,
[
0
,
1
,
3
,
4
,
6
]]
return
self
.
tensor
[:,
[
0
,
1
,
3
,
4
,
6
]]
@
property
@
property
def
near
s
et_bev
(
self
):
def
neare
s
t_bev
(
self
):
"""Calculate the 2D bounding boxes in BEV without rotation
"""Calculate the 2D bounding boxes in BEV without rotation
Returns:
Returns:
...
@@ -117,11 +122,7 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
...
@@ -117,11 +122,7 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
"""Calculate whether the points is in any of the boxes
"""Calculate whether the points is in any of the boxes
Args:
Args:
angles (float | torch.Tensor): rotation angle
angle (float | torch.Tensor): rotation angle
Returns:
None if `return_rot_mat=False`,
torch.Tensor if `return_rot_mat=True`
"""
"""
if
not
isinstance
(
angle
,
torch
.
Tensor
):
if
not
isinstance
(
angle
,
torch
.
Tensor
):
angle
=
self
.
tensor
.
new_tensor
(
angle
)
angle
=
self
.
tensor
.
new_tensor
(
angle
)
...
@@ -133,13 +134,27 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
...
@@ -133,13 +134,27 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
self
.
tensor
[:,
:
3
]
=
self
.
tensor
[:,
:
3
]
@
rot_mat_T
self
.
tensor
[:,
:
3
]
=
self
.
tensor
[:,
:
3
]
@
rot_mat_T
self
.
tensor
[:,
6
]
+=
angle
self
.
tensor
[:,
6
]
+=
angle
def
flip
(
self
):
if
self
.
tensor
.
shape
[
1
]
==
9
:
"""Flip the boxes in horizontal direction
# rotate velo vector
self
.
tensor
[:,
7
:
9
]
=
self
.
tensor
[:,
7
:
9
]
@
rot_mat_T
[:
2
,
:
2
]
def
flip
(
self
,
bev_direction
=
'horizontal'
):
"""Flip the boxes in BEV along given BEV direction
In LIDAR coordinates, it flips the y axis.
In LIDAR coordinates, it flips the y (horizontal) or x (vertical) axis.
Args:
bev_direction (str): Flip direction (horizontal or vertical).
"""
"""
self
.
tensor
[:,
1
::
7
]
=
-
self
.
tensor
[:,
1
::
7
]
assert
bev_direction
in
(
'horizontal'
,
'vertical'
)
self
.
tensor
[:,
6
]
=
-
self
.
tensor
[:,
6
]
+
np
.
pi
if
bev_direction
==
'horizontal'
:
self
.
tensor
[:,
1
::
7
]
=
-
self
.
tensor
[:,
1
::
7
]
if
self
.
with_yaw
:
self
.
tensor
[:,
6
]
=
-
self
.
tensor
[:,
6
]
+
np
.
pi
elif
bev_direction
==
'vertical'
:
self
.
tensor
[:,
0
::
7
]
=
-
self
.
tensor
[:,
0
::
7
]
if
self
.
with_yaw
:
self
.
tensor
[:,
6
]
=
-
self
.
tensor
[:,
6
]
def
in_range_bev
(
self
,
box_range
):
def
in_range_bev
(
self
,
box_range
):
"""Check whether the boxes are in the given range
"""Check whether the boxes are in the given range
...
@@ -155,11 +170,59 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
...
@@ -155,11 +170,59 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
TODO: check whether this will effect the performance
TODO: check whether this will effect the performance
Returns:
Returns:
a binary vector, i
ndicating whether each box is inside
torch.Tensor: I
ndicating whether each box is inside
the reference range.
the reference range.
"""
"""
in_range_flags
=
((
self
.
tensor
[:,
0
]
>
box_range
[
0
])
in_range_flags
=
((
self
.
tensor
[:,
0
]
>
box_range
[
0
])
&
(
self
.
tensor
[:,
1
]
>
box_range
[
1
])
&
(
self
.
tensor
[:,
1
]
>
box_range
[
1
])
&
(
self
.
tensor
[:,
0
]
<
box_range
[
2
])
&
(
self
.
tensor
[:,
0
]
<
box_range
[
2
])
&
(
self
.
tensor
[:,
1
]
<
box_range
[
3
]))
&
(
self
.
tensor
[:,
1
]
<
box_range
[
3
]))
return
in_range_flags
return
in_range_flags
def
convert_to
(
self
,
dst
,
rt_mat
=
None
):
"""Convert self to `dst` mode.
Args:
dst (BoxMode): the target Box mode
rt_mat (np.ndarray | torch.Tensor): The rotation and translation
matrix between different coordinates. Defaults to None.
The conversion from `src` coordinates to `dst` coordinates
usually comes along the change of sensors, e.g., from camera
to LiDAR. This requires a transformation matrix.
Returns:
BaseInstance3DBoxes:
The converted box of the same type in the `dst` mode.
"""
from
.box_3d_mode
import
Box3DMode
return
Box3DMode
.
convert
(
box
=
self
,
src
=
Box3DMode
.
LIDAR
,
dst
=
dst
,
rt_mat
=
rt_mat
)
def
enlarged_box
(
self
,
extra_width
):
"""Enlarge the length, width and height boxes
Args:
extra_width (float | torch.Tensor): extra width to enlarge the box
Returns:
:obj:LiDARInstance3DBoxes: enlarged boxes
"""
enlarged_boxes
=
self
.
tensor
.
clone
()
enlarged_boxes
[:,
3
:
6
]
+=
extra_width
*
2
# bottom center z minus extra_width
enlarged_boxes
[:,
2
]
-=
extra_width
return
self
.
new_box
(
enlarged_boxes
)
def
points_in_boxes
(
self
,
points
):
"""Find the box which the points are in.
Args:
points (:obj:torch.Tensor): Points in shape Nx3
Returns:
torch.Tensor: The index of box where each point are in.
"""
box_idx
=
points_in_boxes_gpu
(
points
.
unsqueeze
(
0
),
self
.
tensor
.
unsqueeze
(
0
).
to
(
points
.
device
)).
squeeze
(
0
)
return
box_idx
mmdet3d/core/optimizer/cocktail_constructor.py
View file @
f2b01720
from
mmcv.runner.optimizer
import
OPTIMIZER_BUILDERS
,
OPTIMIZERS
from
mmcv.utils
import
build_from_cfg
from
mmcv.utils
import
build_from_cfg
from
mmdet3d.utils
import
get_root_logger
from
mmdet3d.utils
import
get_root_logger
from
mmdet.core.optimizer
import
OPTIMIZER_BUILDERS
,
OPTIMIZERS
from
.cocktail_optimizer
import
CocktailOptimizer
from
.cocktail_optimizer
import
CocktailOptimizer
...
...
mmdet3d/core/optimizer/cocktail_optimizer.py
View file @
f2b01720
from
mmcv.runner.optimizer
import
OPTIMIZERS
from
torch.optim
import
Optimizer
from
torch.optim
import
Optimizer
from
mmdet.core.optimizer
import
OPTIMIZERS
@
OPTIMIZERS
.
register_module
()
@
OPTIMIZERS
.
register_module
()
class
CocktailOptimizer
(
Optimizer
):
class
CocktailOptimizer
(
Optimizer
):
...
@@ -9,6 +8,11 @@ class CocktailOptimizer(Optimizer):
...
@@ -9,6 +8,11 @@ class CocktailOptimizer(Optimizer):
This optimizer applies the cocktail optimzation for multi-modality models.
This optimizer applies the cocktail optimzation for multi-modality models.
Args:
optimizers (list[:obj:`torch.optim.Optimizer`]): The list containing
different optimizers that optimize different parameters
step_intervals (list[int]): Step intervals of each optimizer
"""
"""
def
__init__
(
self
,
optimizers
,
step_intervals
=
None
):
def
__init__
(
self
,
optimizers
,
step_intervals
=
None
):
...
@@ -18,6 +22,9 @@ class CocktailOptimizer(Optimizer):
...
@@ -18,6 +22,9 @@ class CocktailOptimizer(Optimizer):
self
.
param_groups
+=
optimizer
.
param_groups
self
.
param_groups
+=
optimizer
.
param_groups
if
not
isinstance
(
step_intervals
,
list
):
if
not
isinstance
(
step_intervals
,
list
):
step_intervals
=
[
1
]
*
len
(
self
.
optimizers
)
step_intervals
=
[
1
]
*
len
(
self
.
optimizers
)
assert
len
(
step_intervals
)
==
len
(
optimizers
),
\
'"step_intervals" should contain the same number of intervals as'
\
f
'len(optimizers)=
{
len
(
optimizers
)
}
, got
{
step_intervals
}
'
self
.
step_intervals
=
step_intervals
self
.
step_intervals
=
step_intervals
self
.
num_step_updated
=
0
self
.
num_step_updated
=
0
...
...
mmdet3d/datasets/kitti_dataset.py
View file @
f2b01720
...
@@ -9,7 +9,7 @@ import torch
...
@@ -9,7 +9,7 @@ import torch
from
mmcv.utils
import
print_log
from
mmcv.utils
import
print_log
from
mmdet.datasets
import
DATASETS
from
mmdet.datasets
import
DATASETS
from
..core.bbox
import
box_np_ops
from
..core.bbox
import
Box3DMode
,
CameraInstance3DBoxes
,
box_np_ops
from
.custom_3d
import
Custom3DDataset
from
.custom_3d
import
Custom3DDataset
from
.utils
import
remove_dontcare
from
.utils
import
remove_dontcare
...
@@ -87,13 +87,14 @@ class KittiDataset(Custom3DDataset):
...
@@ -87,13 +87,14 @@ class KittiDataset(Custom3DDataset):
# print(gt_names, len(loc))
# print(gt_names, len(loc))
gt_bboxes_3d
=
np
.
concatenate
([
loc
,
dims
,
rots
[...,
np
.
newaxis
]],
gt_bboxes_3d
=
np
.
concatenate
([
loc
,
dims
,
rots
[...,
np
.
newaxis
]],
axis
=
1
).
astype
(
np
.
float32
)
axis
=
1
).
astype
(
np
.
float32
)
# this change gt_bboxes_3d to velodyne coordinates
gt_bboxes_3d
=
box_np_ops
.
box_camera_to_lidar
(
gt_bboxes_3d
,
rect
,
# convert gt_bboxes_3d to velodyne coordinates
Trv2c
)
gt_bboxes_3d
=
CameraInstance3DBoxes
(
gt_bboxes_3d
).
convert_to
(
Box3DMode
.
LIDAR
,
np
.
linalg
.
inv
(
rect
@
Trv2c
))
gt_bboxes
=
annos
[
'bbox'
]
gt_bboxes
=
annos
[
'bbox'
]
selected
=
self
.
drop_arrays_by_name
(
gt_names
,
[
'DontCare'
])
selected
=
self
.
drop_arrays_by_name
(
gt_names
,
[
'DontCare'
])
gt_bboxes_3d
=
gt_bboxes_3d
[
selected
].
astype
(
'float32'
)
#
gt_bboxes_3d = gt_bboxes_3d[selected].astype('float32')
gt_bboxes
=
gt_bboxes
[
selected
].
astype
(
'float32'
)
gt_bboxes
=
gt_bboxes
[
selected
].
astype
(
'float32'
)
gt_names
=
gt_names
[
selected
]
gt_names
=
gt_names
[
selected
]
...
...
mmdet3d/datasets/nuscenes_dataset.py
View file @
f2b01720
...
@@ -7,7 +7,7 @@ import pyquaternion
...
@@ -7,7 +7,7 @@ import pyquaternion
from
nuscenes.utils.data_classes
import
Box
as
NuScenesBox
from
nuscenes.utils.data_classes
import
Box
as
NuScenesBox
from
mmdet.datasets
import
DATASETS
from
mmdet.datasets
import
DATASETS
from
..core.bbox
import
box_np_ops
from
..core.bbox
import
LiDARInstance3DBoxes
,
box_np_ops
from
.custom_3d
import
Custom3DDataset
from
.custom_3d
import
Custom3DDataset
...
@@ -152,10 +152,6 @@ class NuScenesDataset(Custom3DDataset):
...
@@ -152,10 +152,6 @@ class NuScenesDataset(Custom3DDataset):
# filter out bbox containing no points
# filter out bbox containing no points
mask
=
info
[
'num_lidar_pts'
]
>
0
mask
=
info
[
'num_lidar_pts'
]
>
0
gt_bboxes_3d
=
info
[
'gt_boxes'
][
mask
]
gt_bboxes_3d
=
info
[
'gt_boxes'
][
mask
]
# the nuscenes box center is [0.5, 0.5, 0.5], we keep it
# the same as KITTI [0.5, 0.5, 0]
box_np_ops
.
change_box3d_center_
(
gt_bboxes_3d
,
[
0.5
,
0.5
,
0.5
],
[
0.5
,
0.5
,
0
])
gt_names_3d
=
info
[
'gt_names'
][
mask
]
gt_names_3d
=
info
[
'gt_names'
][
mask
]
gt_labels_3d
=
[]
gt_labels_3d
=
[]
for
cat
in
gt_names_3d
:
for
cat
in
gt_names_3d
:
...
@@ -171,6 +167,13 @@ class NuScenesDataset(Custom3DDataset):
...
@@ -171,6 +167,13 @@ class NuScenesDataset(Custom3DDataset):
gt_velocity
[
nan_mask
]
=
[
0.0
,
0.0
]
gt_velocity
[
nan_mask
]
=
[
0.0
,
0.0
]
gt_bboxes_3d
=
np
.
concatenate
([
gt_bboxes_3d
,
gt_velocity
],
axis
=-
1
)
gt_bboxes_3d
=
np
.
concatenate
([
gt_bboxes_3d
,
gt_velocity
],
axis
=-
1
)
# the nuscenes box center is [0.5, 0.5, 0.5], we keep it
# the same as KITTI [0.5, 0.5, 0]
gt_bboxes_3d
=
LiDARInstance3DBoxes
(
gt_bboxes_3d
,
box_dim
=
gt_bboxes_3d
.
shape
[
-
1
],
origin
=
[
0.5
,
0.5
,
0.5
])
anns_results
=
dict
(
anns_results
=
dict
(
gt_bboxes_3d
=
gt_bboxes_3d
,
gt_bboxes_3d
=
gt_bboxes_3d
,
gt_labels_3d
=
gt_labels_3d
,
gt_labels_3d
=
gt_labels_3d
,
...
...
mmdet3d/datasets/pipelines/formating.py
View file @
f2b01720
import
numpy
as
np
import
numpy
as
np
from
mmcv.parallel
import
DataContainer
as
DC
from
mmcv.parallel
import
DataContainer
as
DC
from
mmdet3d.core.bbox
import
BaseInstance3DBoxes
from
mmdet.datasets.builder
import
PIPELINES
from
mmdet.datasets.builder
import
PIPELINES
from
mmdet.datasets.pipelines
import
to_tensor
from
mmdet.datasets.pipelines
import
to_tensor
...
@@ -39,9 +40,8 @@ class DefaultFormatBundle(object):
...
@@ -39,9 +40,8 @@ class DefaultFormatBundle(object):
img
=
np
.
ascontiguousarray
(
results
[
'img'
].
transpose
(
2
,
0
,
1
))
img
=
np
.
ascontiguousarray
(
results
[
'img'
].
transpose
(
2
,
0
,
1
))
results
[
'img'
]
=
DC
(
to_tensor
(
img
),
stack
=
True
)
results
[
'img'
]
=
DC
(
to_tensor
(
img
),
stack
=
True
)
for
key
in
[
for
key
in
[
'proposals'
,
'gt_bboxes'
,
'gt_bboxes_3d'
,
'gt_bboxes_ignore'
,
'proposals'
,
'gt_bboxes'
,
'gt_bboxes_ignore'
,
'gt_labels'
,
'gt_labels'
,
'gt_labels_3d'
,
'pts_instance_mask'
,
'gt_labels_3d'
,
'pts_instance_mask'
,
'pts_semantic_mask'
'pts_semantic_mask'
]:
]:
if
key
not
in
results
:
if
key
not
in
results
:
continue
continue
...
@@ -49,6 +49,14 @@ class DefaultFormatBundle(object):
...
@@ -49,6 +49,14 @@ class DefaultFormatBundle(object):
results
[
key
]
=
DC
([
to_tensor
(
res
)
for
res
in
results
[
key
]])
results
[
key
]
=
DC
([
to_tensor
(
res
)
for
res
in
results
[
key
]])
else
:
else
:
results
[
key
]
=
DC
(
to_tensor
(
results
[
key
]))
results
[
key
]
=
DC
(
to_tensor
(
results
[
key
]))
if
'gt_bboxes_3d'
in
results
:
if
isinstance
(
results
[
'gt_bboxes_3d'
],
BaseInstance3DBoxes
):
results
[
'gt_bboxes_3d'
]
=
DC
(
results
[
'gt_bboxes_3d'
],
cpu_only
=
True
)
else
:
results
[
'gt_bboxes_3d'
]
=
DC
(
to_tensor
(
results
[
'gt_bboxes_3d'
]))
if
'gt_masks'
in
results
:
if
'gt_masks'
in
results
:
results
[
'gt_masks'
]
=
DC
(
results
[
'gt_masks'
],
cpu_only
=
True
)
results
[
'gt_masks'
]
=
DC
(
results
[
'gt_masks'
],
cpu_only
=
True
)
if
'gt_semantic_seg'
in
results
:
if
'gt_semantic_seg'
in
results
:
...
...
mmdet3d/datasets/pipelines/train_aug.py
View file @
f2b01720
...
@@ -26,12 +26,8 @@ class RandomFlip3D(RandomFlip):
...
@@ -26,12 +26,8 @@ class RandomFlip3D(RandomFlip):
self
.
sync_2d
=
sync_2d
self
.
sync_2d
=
sync_2d
def
random_flip_points
(
self
,
gt_bboxes_3d
,
points
):
def
random_flip_points
(
self
,
gt_bboxes_3d
,
points
):
gt_bboxes_3d
[:,
1
]
=
-
gt_bboxes_3d
[:,
1
]
gt_bboxes_3d
.
flip
()
gt_bboxes_3d
[:,
6
]
=
-
gt_bboxes_3d
[:,
6
]
+
np
.
pi
points
[:,
1
]
=
-
points
[:,
1
]
points
[:,
1
]
=
-
points
[:,
1
]
if
gt_bboxes_3d
.
shape
[
1
]
==
9
:
# flip velocitys at the same time
gt_bboxes_3d
[:,
8
]
=
-
gt_bboxes_3d
[:,
8
]
return
gt_bboxes_3d
,
points
return
gt_bboxes_3d
,
points
def
__call__
(
self
,
input_dict
):
def
__call__
(
self
,
input_dict
):
...
@@ -121,10 +117,13 @@ class ObjectSample(object):
...
@@ -121,10 +117,13 @@ class ObjectSample(object):
gt_bboxes_2d
=
input_dict
[
'gt_bboxes'
]
gt_bboxes_2d
=
input_dict
[
'gt_bboxes'
]
# Assume for now 3D & 2D bboxes are the same
# Assume for now 3D & 2D bboxes are the same
sampled_dict
=
self
.
db_sampler
.
sample_all
(
sampled_dict
=
self
.
db_sampler
.
sample_all
(
gt_bboxes_3d
,
gt_labels_3d
,
gt_bboxes_2d
=
gt_bboxes_2d
,
img
=
img
)
gt_bboxes_3d
.
tensor
.
numpy
(),
gt_labels_3d
,
gt_bboxes_2d
=
gt_bboxes_2d
,
img
=
img
)
else
:
else
:
sampled_dict
=
self
.
db_sampler
.
sample_all
(
sampled_dict
=
self
.
db_sampler
.
sample_all
(
gt_bboxes_3d
,
gt_labels_3d
,
img
=
None
)
gt_bboxes_3d
.
tensor
.
numpy
()
,
gt_labels_3d
,
img
=
None
)
if
sampled_dict
is
not
None
:
if
sampled_dict
is
not
None
:
sampled_gt_bboxes_3d
=
sampled_dict
[
'gt_bboxes_3d'
]
sampled_gt_bboxes_3d
=
sampled_dict
[
'gt_bboxes_3d'
]
...
@@ -133,8 +132,9 @@ class ObjectSample(object):
...
@@ -133,8 +132,9 @@ class ObjectSample(object):
gt_labels_3d
=
np
.
concatenate
([
gt_labels_3d
,
sampled_gt_labels
],
gt_labels_3d
=
np
.
concatenate
([
gt_labels_3d
,
sampled_gt_labels
],
axis
=
0
)
axis
=
0
)
gt_bboxes_3d
=
np
.
concatenate
([
gt_bboxes_3d
,
sampled_gt_bboxes_3d
gt_bboxes_3d
=
gt_bboxes_3d
.
new_box
(
]).
astype
(
np
.
float32
)
np
.
concatenate
(
[
gt_bboxes_3d
.
tensor
.
numpy
(),
sampled_gt_bboxes_3d
]))
points
=
self
.
remove_points_in_boxes
(
points
,
sampled_gt_bboxes_3d
)
points
=
self
.
remove_points_in_boxes
(
points
,
sampled_gt_bboxes_3d
)
# check the points dimension
# check the points dimension
...
@@ -178,14 +178,16 @@ class ObjectNoise(object):
...
@@ -178,14 +178,16 @@ class ObjectNoise(object):
points
=
input_dict
[
'points'
]
points
=
input_dict
[
'points'
]
# TODO: check this inplace function
# TODO: check this inplace function
numpy_box
=
gt_bboxes_3d
.
tensor
.
numpy
()
noise_per_object_v3_
(
noise_per_object_v3_
(
gt
_
b
box
es_3d
,
numpy
_box
,
points
,
points
,
rotation_perturb
=
self
.
rot_uniform_noise
,
rotation_perturb
=
self
.
rot_uniform_noise
,
center_noise_std
=
self
.
loc_noise_std
,
center_noise_std
=
self
.
loc_noise_std
,
global_random_rot_range
=
self
.
global_rot_range
,
global_random_rot_range
=
self
.
global_rot_range
,
num_try
=
self
.
num_try
)
num_try
=
self
.
num_try
)
input_dict
[
'gt_bboxes_3d'
]
=
gt_bboxes_3d
.
astype
(
'float32'
)
input_dict
[
'gt_bboxes_3d'
]
=
gt_bboxes_3d
.
new_box
(
numpy_box
)
input_dict
[
'points'
]
=
points
input_dict
[
'points'
]
=
points
return
input_dict
return
input_dict
...
@@ -212,7 +214,7 @@ class GlobalRotScale(object):
...
@@ -212,7 +214,7 @@ class GlobalRotScale(object):
def
_trans_bbox_points
(
self
,
gt_boxes
,
points
):
def
_trans_bbox_points
(
self
,
gt_boxes
,
points
):
noise_trans
=
np
.
random
.
normal
(
0
,
self
.
trans_normal_noise
[
0
],
3
).
T
noise_trans
=
np
.
random
.
normal
(
0
,
self
.
trans_normal_noise
[
0
],
3
).
T
points
[:,
:
3
]
+=
noise_trans
points
[:,
:
3
]
+=
noise_trans
gt_boxes
[:,
:
3
]
+=
noise_trans
gt_boxes
.
translate
(
noise_trans
)
return
gt_boxes
,
points
,
noise_trans
return
gt_boxes
,
points
,
noise_trans
def
_rot_bbox_points
(
self
,
gt_boxes
,
points
,
rotation
=
np
.
pi
/
4
):
def
_rot_bbox_points
(
self
,
gt_boxes
,
points
,
rotation
=
np
.
pi
/
4
):
...
@@ -221,16 +223,8 @@ class GlobalRotScale(object):
...
@@ -221,16 +223,8 @@ class GlobalRotScale(object):
noise_rotation
=
np
.
random
.
uniform
(
rotation
[
0
],
rotation
[
1
])
noise_rotation
=
np
.
random
.
uniform
(
rotation
[
0
],
rotation
[
1
])
points
[:,
:
3
],
rot_mat_T
=
box_np_ops
.
rotation_points_single_angle
(
points
[:,
:
3
],
rot_mat_T
=
box_np_ops
.
rotation_points_single_angle
(
points
[:,
:
3
],
noise_rotation
,
axis
=
2
)
points
[:,
:
3
],
noise_rotation
,
axis
=
2
)
gt_boxes
[:,
:
3
],
_
=
box_np_ops
.
rotation_points_single_angle
(
gt_boxes
.
rotate
(
noise_rotation
)
gt_boxes
[:,
:
3
],
noise_rotation
,
axis
=
2
)
gt_boxes
[:,
6
]
+=
noise_rotation
if
gt_boxes
.
shape
[
1
]
==
9
:
# rotate velo vector
rot_cos
=
np
.
cos
(
noise_rotation
)
rot_sin
=
np
.
sin
(
noise_rotation
)
rot_mat_T_bev
=
np
.
array
([[
rot_cos
,
-
rot_sin
],
[
rot_sin
,
rot_cos
]],
dtype
=
points
.
dtype
)
gt_boxes
[:,
7
:
9
]
=
gt_boxes
[:,
7
:
9
]
@
rot_mat_T_bev
return
gt_boxes
,
points
,
rot_mat_T
return
gt_boxes
,
points
,
rot_mat_T
def
_scale_bbox_points
(
self
,
def
_scale_bbox_points
(
self
,
...
@@ -240,9 +234,7 @@ class GlobalRotScale(object):
...
@@ -240,9 +234,7 @@ class GlobalRotScale(object):
max_scale
=
1.05
):
max_scale
=
1.05
):
noise_scale
=
np
.
random
.
uniform
(
min_scale
,
max_scale
)
noise_scale
=
np
.
random
.
uniform
(
min_scale
,
max_scale
)
points
[:,
:
3
]
*=
noise_scale
points
[:,
:
3
]
*=
noise_scale
gt_boxes
[:,
:
6
]
*=
noise_scale
gt_boxes
.
scale
(
noise_scale
)
if
gt_boxes
.
shape
[
1
]
==
9
:
gt_boxes
[:,
7
:]
*=
noise_scale
return
gt_boxes
,
points
,
noise_scale
return
gt_boxes
,
points
,
noise_scale
def
__call__
(
self
,
input_dict
):
def
__call__
(
self
,
input_dict
):
...
@@ -256,7 +248,7 @@ class GlobalRotScale(object):
...
@@ -256,7 +248,7 @@ class GlobalRotScale(object):
gt_bboxes_3d
,
points
,
trans_factor
=
self
.
_trans_bbox_points
(
gt_bboxes_3d
,
points
,
trans_factor
=
self
.
_trans_bbox_points
(
gt_bboxes_3d
,
points
)
gt_bboxes_3d
,
points
)
input_dict
[
'gt_bboxes_3d'
]
=
gt_bboxes_3d
.
astype
(
'float32'
)
input_dict
[
'gt_bboxes_3d'
]
=
gt_bboxes_3d
input_dict
[
'points'
]
=
points
input_dict
[
'points'
]
=
points
input_dict
[
'pcd_scale_factor'
]
=
scale_factor
input_dict
[
'pcd_scale_factor'
]
=
scale_factor
input_dict
[
'pcd_rotation'
]
=
rotation_factor
input_dict
[
'pcd_rotation'
]
=
rotation_factor
...
@@ -290,10 +282,6 @@ class ObjectRangeFilter(object):
...
@@ -290,10 +282,6 @@ class ObjectRangeFilter(object):
self
.
pcd_range
=
np
.
array
(
point_cloud_range
,
dtype
=
np
.
float32
)
self
.
pcd_range
=
np
.
array
(
point_cloud_range
,
dtype
=
np
.
float32
)
self
.
bev_range
=
self
.
pcd_range
[[
0
,
1
,
3
,
4
]]
self
.
bev_range
=
self
.
pcd_range
[[
0
,
1
,
3
,
4
]]
@
staticmethod
def
limit_period
(
val
,
offset
=
0.5
,
period
=
np
.
pi
):
return
val
-
np
.
floor
(
val
/
period
+
offset
)
*
period
@
staticmethod
@
staticmethod
def
filter_gt_box_outside_range
(
gt_bboxes_3d
,
limit_range
):
def
filter_gt_box_outside_range
(
gt_bboxes_3d
,
limit_range
):
"""remove gtbox outside training range.
"""remove gtbox outside training range.
...
@@ -314,14 +302,13 @@ class ObjectRangeFilter(object):
...
@@ -314,14 +302,13 @@ class ObjectRangeFilter(object):
def
__call__
(
self
,
input_dict
):
def
__call__
(
self
,
input_dict
):
gt_bboxes_3d
=
input_dict
[
'gt_bboxes_3d'
]
gt_bboxes_3d
=
input_dict
[
'gt_bboxes_3d'
]
gt_labels_3d
=
input_dict
[
'gt_labels_3d'
]
gt_labels_3d
=
input_dict
[
'gt_labels_3d'
]
mask
=
self
.
filter_
gt_box
_outside_range
(
gt_bboxes_3d
,
self
.
bev_range
)
mask
=
gt_
b
box
es_3d
.
in_range_bev
(
self
.
bev_range
)
gt_bboxes_3d
=
gt_bboxes_3d
[
mask
]
gt_bboxes_3d
=
gt_bboxes_3d
[
mask
]
gt_labels_3d
=
gt_labels_3d
[
mask
]
gt_labels_3d
=
gt_labels_3d
[
mask
]
# limit rad to [-pi, pi]
# limit rad to [-pi, pi]
gt_bboxes_3d
[:,
6
]
=
self
.
limit_period
(
gt_bboxes_3d
.
limit_yaw
(
offset
=
0.5
,
period
=
2
*
np
.
pi
)
gt_bboxes_3d
[:,
6
],
offset
=
0.5
,
period
=
2
*
np
.
pi
)
input_dict
[
'gt_bboxes_3d'
]
=
gt_bboxes_3d
input_dict
[
'gt_bboxes_3d'
]
=
gt_bboxes_3d
.
astype
(
'float32'
)
input_dict
[
'gt_labels_3d'
]
=
gt_labels_3d
input_dict
[
'gt_labels_3d'
]
=
gt_labels_3d
return
input_dict
return
input_dict
...
...
mmdet3d/models/dense_heads/train_mixins.py
View file @
f2b01720
...
@@ -168,6 +168,8 @@ class AnchorTrainMixin(object):
...
@@ -168,6 +168,8 @@ class AnchorTrainMixin(object):
labels
=
anchors
.
new_zeros
(
num_valid_anchors
,
dtype
=
torch
.
long
)
labels
=
anchors
.
new_zeros
(
num_valid_anchors
,
dtype
=
torch
.
long
)
label_weights
=
anchors
.
new_zeros
(
num_valid_anchors
,
dtype
=
torch
.
float
)
label_weights
=
anchors
.
new_zeros
(
num_valid_anchors
,
dtype
=
torch
.
float
)
if
len
(
gt_bboxes
)
>
0
:
if
len
(
gt_bboxes
)
>
0
:
if
not
isinstance
(
gt_bboxes
,
torch
.
Tensor
):
gt_bboxes
=
gt_bboxes
.
tensor
.
to
(
anchors
.
device
)
assign_result
=
bbox_assigner
.
assign
(
anchors
,
gt_bboxes
,
assign_result
=
bbox_assigner
.
assign
(
anchors
,
gt_bboxes
,
gt_bboxes_ignore
,
gt_labels
)
gt_bboxes_ignore
,
gt_labels
)
sampling_result
=
self
.
bbox_sampler
.
sample
(
assign_result
,
anchors
,
sampling_result
=
self
.
bbox_sampler
.
sample
(
assign_result
,
anchors
,
...
...
mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
View file @
f2b01720
...
@@ -121,8 +121,7 @@ class PartA2BboxHead(nn.Module):
...
@@ -121,8 +121,7 @@ class PartA2BboxHead(nn.Module):
3
,
3
,
padding
=
1
,
padding
=
1
,
norm_cfg
=
norm_cfg
,
norm_cfg
=
norm_cfg
,
indice_key
=
f
'rcnn_down0'
,
indice_key
=
'rcnn_down0'
))
conv_type
=
'SubMConv3d'
))
merge_conv_channel_last
=
channel
merge_conv_channel_last
=
channel
down_conv_channel_last
=
merge_conv_channel_last
down_conv_channel_last
=
merge_conv_channel_last
...
@@ -135,8 +134,7 @@ class PartA2BboxHead(nn.Module):
...
@@ -135,8 +134,7 @@ class PartA2BboxHead(nn.Module):
3
,
3
,
padding
=
1
,
padding
=
1
,
norm_cfg
=
norm_cfg
,
norm_cfg
=
norm_cfg
,
indice_key
=
f
'rcnn_down1'
,
indice_key
=
'rcnn_down1'
))
conv_type
=
'SubMConv3d'
))
down_conv_channel_last
=
channel
down_conv_channel_last
=
channel
self
.
conv_down
.
add_module
(
'merge_conv'
,
self
.
conv_down
.
add_module
(
'merge_conv'
,
...
...
mmdet3d/models/roi_heads/mask_heads/pointwise_semantic_head.py
View file @
f2b01720
...
@@ -5,7 +5,6 @@ import torch.nn.functional as F
...
@@ -5,7 +5,6 @@ import torch.nn.functional as F
from
mmdet3d.core
import
multi_apply
from
mmdet3d.core
import
multi_apply
from
mmdet3d.core.bbox
import
box_torch_ops
from
mmdet3d.core.bbox
import
box_torch_ops
from
mmdet3d.models.builder
import
build_loss
from
mmdet3d.models.builder
import
build_loss
from
mmdet3d.ops.roiaware_pool3d
import
points_in_boxes_gpu
from
mmdet.models
import
HEADS
from
mmdet.models
import
HEADS
...
@@ -14,7 +13,7 @@ class PointwiseSemanticHead(nn.Module):
...
@@ -14,7 +13,7 @@ class PointwiseSemanticHead(nn.Module):
"""Semantic segmentation head for point-wise segmentation.
"""Semantic segmentation head for point-wise segmentation.
Predict point-wise segmentation and part regression results for PartA2.
Predict point-wise segmentation and part regression results for PartA2.
See https://arxiv.org/abs/1907.03670 for more detials.
See
`paper <
https://arxiv.org/abs/1907.03670
>`_
for more detials.
Args:
Args:
in_channels (int): the number of input channel.
in_channels (int): the number of input channel.
...
@@ -65,28 +64,27 @@ class PointwiseSemanticHead(nn.Module):
...
@@ -65,28 +64,27 @@ class PointwiseSemanticHead(nn.Module):
seg_preds
=
seg_preds
,
part_preds
=
part_preds
,
part_feats
=
part_feats
)
seg_preds
=
seg_preds
,
part_preds
=
part_preds
,
part_feats
=
part_feats
)
def
get_targets_single
(
self
,
voxel_centers
,
gt_bboxes_3d
,
gt_labels_3d
):
def
get_targets_single
(
self
,
voxel_centers
,
gt_bboxes_3d
,
gt_labels_3d
):
"""generate segmentation and part prediction targets
"""generate segmentation and part prediction targets
for a single sample
Args:
Args:
voxel_centers (torch.Tensor): shape [voxel_num, 3],
voxel_centers (torch.Tensor): shape [voxel_num, 3],
the center of voxels
the center of voxels
gt_bboxes_3d (torch.Tensor): shape [box_num, 7], gt boxes
gt_bboxes_3d (:obj:BaseInstance3DBoxes): gt boxes containing tensor
of shape [box_num, 7].
gt_labels_3d (torch.Tensor): shape [box_num], class label of gt
gt_labels_3d (torch.Tensor): shape [box_num], class label of gt
Returns:
Returns:
tuple : segmentation targets with shape [voxel_num]
tuple : segmentation targets with shape [voxel_num]
part prediction targets with shape [voxel_num, 3]
part prediction targets with shape [voxel_num, 3]
"""
"""
enlarged_gt_boxes
=
box_torch_ops
.
enlarge_box3d_lidar
(
gt_bboxes_3d
=
gt_bboxes_3d
.
to
(
voxel_centers
.
device
)
gt_bboxes_3d
,
extra_width
=
self
.
extra_width
)
enlarged_gt_boxes
=
gt_bboxes_3d
.
enlarged_box
(
self
.
extra_width
)
part_targets
=
voxel_centers
.
new_zeros
((
voxel_centers
.
shape
[
0
],
3
),
part_targets
=
voxel_centers
.
new_zeros
((
voxel_centers
.
shape
[
0
],
3
),
dtype
=
torch
.
float32
)
dtype
=
torch
.
float32
)
box_idx
=
points_in_boxes_gpu
(
box_idx
=
gt_bboxes_3d
.
points_in_boxes
(
voxel_centers
)
voxel_centers
.
unsqueeze
(
0
),
enlarge_box_idx
=
enlarged_gt_boxes
.
points_in_boxes
(
gt_bboxes_3d
.
unsqueeze
(
0
)).
squeeze
(
0
)
# -1 ~ box_num
voxel_centers
).
long
()
enlarge_box_idx
=
points_in_boxes_gpu
(
voxel_centers
.
unsqueeze
(
0
),
enlarged_gt_boxes
.
unsqueeze
(
0
)).
squeeze
(
0
).
long
()
# -1 ~ box_num
gt_labels_pad
=
F
.
pad
(
gt_labels_pad
=
F
.
pad
(
gt_labels_3d
,
(
1
,
0
),
mode
=
'constant'
,
value
=
self
.
num_classes
)
gt_labels_3d
,
(
1
,
0
),
mode
=
'constant'
,
value
=
self
.
num_classes
)
...
@@ -95,24 +93,37 @@ class PointwiseSemanticHead(nn.Module):
...
@@ -95,24 +93,37 @@ class PointwiseSemanticHead(nn.Module):
ignore_flag
=
fg_pt_flag
^
(
enlarge_box_idx
>
-
1
)
ignore_flag
=
fg_pt_flag
^
(
enlarge_box_idx
>
-
1
)
seg_targets
[
ignore_flag
]
=
-
1
seg_targets
[
ignore_flag
]
=
-
1
for
k
in
range
(
gt_bboxes_3d
.
shape
[
0
]
):
for
k
in
range
(
len
(
gt_bboxes_3d
)
):
k_box_flag
=
box_idx
==
k
k_box_flag
=
box_idx
==
k
# no point in current box (caused by velodyne reduce)
# no point in current box (caused by velodyne reduce)
if
not
k_box_flag
.
any
():
if
not
k_box_flag
.
any
():
continue
continue
fg_voxels
=
voxel_centers
[
k_box_flag
]
fg_voxels
=
voxel_centers
[
k_box_flag
]
transformed_voxels
=
fg_voxels
-
gt_bboxes_3d
[
k
,
0
:
3
]
transformed_voxels
=
fg_voxels
-
gt_bboxes_3d
.
bottom_center
[
k
]
transformed_voxels
=
box_torch_ops
.
rotation_3d_in_axis
(
transformed_voxels
=
box_torch_ops
.
rotation_3d_in_axis
(
transformed_voxels
.
unsqueeze
(
0
),
transformed_voxels
.
unsqueeze
(
0
),
-
gt_bboxes_3d
[
k
,
6
].
view
(
1
),
-
gt_bboxes_3d
.
yaw
[
k
].
view
(
1
),
axis
=
2
)
axis
=
2
)
part_targets
[
k_box_flag
]
=
transformed_voxels
/
gt_bboxes_3d
[
part_targets
[
k_box_flag
]
=
transformed_voxels
/
gt_bboxes_3d
.
dims
[
k
,
3
:
6
]
+
voxel_centers
.
new_tensor
([
0.5
,
0.5
,
0
])
k
]
+
voxel_centers
.
new_tensor
([
0.5
,
0.5
,
0
])
part_targets
=
torch
.
clamp
(
part_targets
,
min
=
0
)
part_targets
=
torch
.
clamp
(
part_targets
,
min
=
0
)
return
seg_targets
,
part_targets
return
seg_targets
,
part_targets
def
get_targets
(
self
,
voxels_dict
,
gt_bboxes_3d
,
gt_labels_3d
):
def
get_targets
(
self
,
voxels_dict
,
gt_bboxes_3d
,
gt_labels_3d
):
"""generate segmentation and part prediction targets
Args:
voxel_centers (torch.Tensor): shape [voxel_num, 3],
the center of voxels
gt_bboxes_3d (list[:obj:BaseInstance3DBoxes]): list of gt boxes
containing tensor of shape [box_num, 7].
gt_labels_3d (list[torch.Tensor]): list of GT labels.
Returns:
tuple : segmentation targets with shape [voxel_num]
part prediction targets with shape [voxel_num, 3]
"""
batch_size
=
len
(
gt_labels_3d
)
batch_size
=
len
(
gt_labels_3d
)
voxel_center_list
=
[]
voxel_center_list
=
[]
for
idx
in
range
(
batch_size
):
for
idx
in
range
(
batch_size
):
...
...
mmdet3d/models/roi_heads/part_aggregation_roi_head.py
View file @
f2b01720
...
@@ -8,7 +8,7 @@ from ..builder import build_head, build_roi_extractor
...
@@ -8,7 +8,7 @@ from ..builder import build_head, build_roi_extractor
from
.base_3droi_head
import
Base3DRoIHead
from
.base_3droi_head
import
Base3DRoIHead
@
HEADS
.
register_module
@
HEADS
.
register_module
()
class
PartAggregationROIHead
(
Base3DRoIHead
):
class
PartAggregationROIHead
(
Base3DRoIHead
):
"""Part aggregation roi head for PartA2"""
"""Part aggregation roi head for PartA2"""
...
@@ -174,7 +174,7 @@ class PartAggregationROIHead(Base3DRoIHead):
...
@@ -174,7 +174,7 @@ class PartAggregationROIHead(Base3DRoIHead):
cur_proposal_list
=
proposal_list
[
batch_idx
]
cur_proposal_list
=
proposal_list
[
batch_idx
]
cur_boxes
=
cur_proposal_list
[
'boxes_3d'
]
cur_boxes
=
cur_proposal_list
[
'boxes_3d'
]
cur_labels_3d
=
cur_proposal_list
[
'labels_3d'
]
cur_labels_3d
=
cur_proposal_list
[
'labels_3d'
]
cur_gt_bboxes
=
gt_bboxes_3d
[
batch_idx
]
cur_gt_bboxes
=
gt_bboxes_3d
[
batch_idx
]
.
to
(
cur_boxes
.
device
)
cur_gt_labels
=
gt_labels_3d
[
batch_idx
]
cur_gt_labels
=
gt_labels_3d
[
batch_idx
]
batch_num_gts
=
0
batch_num_gts
=
0
...
@@ -189,7 +189,7 @@ class PartAggregationROIHead(Base3DRoIHead):
...
@@ -189,7 +189,7 @@ class PartAggregationROIHead(Base3DRoIHead):
pred_per_cls
=
(
cur_labels_3d
==
i
)
pred_per_cls
=
(
cur_labels_3d
==
i
)
cur_assign_res
=
assigner
.
assign
(
cur_assign_res
=
assigner
.
assign
(
cur_boxes
[
pred_per_cls
],
cur_boxes
[
pred_per_cls
],
cur_gt_bboxes
[
gt_per_cls
],
cur_gt_bboxes
.
tensor
[
gt_per_cls
],
gt_labels
=
cur_gt_labels
[
gt_per_cls
])
gt_labels
=
cur_gt_labels
[
gt_per_cls
])
# gather assign_results in different class into one result
# gather assign_results in different class into one result
batch_num_gts
+=
cur_assign_res
.
num_gts
batch_num_gts
+=
cur_assign_res
.
num_gts
...
@@ -215,11 +215,11 @@ class PartAggregationROIHead(Base3DRoIHead):
...
@@ -215,11 +215,11 @@ class PartAggregationROIHead(Base3DRoIHead):
batch_gt_labels
)
batch_gt_labels
)
else
:
# for single class
else
:
# for single class
assign_result
=
self
.
bbox_assigner
.
assign
(
assign_result
=
self
.
bbox_assigner
.
assign
(
cur_boxes
,
cur_gt_bboxes
,
gt_labels
=
cur_gt_labels
)
cur_boxes
,
cur_gt_bboxes
.
tensor
,
gt_labels
=
cur_gt_labels
)
# sample boxes
# sample boxes
sampling_result
=
self
.
bbox_sampler
.
sample
(
assign_result
,
sampling_result
=
self
.
bbox_sampler
.
sample
(
assign_result
,
cur_boxes
,
cur_boxes
,
cur_gt_bboxes
,
cur_gt_bboxes
.
tensor
,
cur_gt_labels
)
cur_gt_labels
)
sampling_results
.
append
(
sampling_result
)
sampling_results
.
append
(
sampling_result
)
return
sampling_results
return
sampling_results
...
...
mmdet3d/models/voxel_encoders/__init__.py
View file @
f2b01720
from
.pillar_encoder
import
AlignedPillarFeatureNet
,
PillarFeatureNet
from
.pillar_encoder
import
PillarFeatureNet
from
.voxel_encoder
import
(
DynamicVFE
,
VoxelFeatureExtractor
,
from
.voxel_encoder
import
DynamicSimpleVFE
,
DynamicVFE
,
HardSimpleVFE
,
HardVFE
VoxelFeatureExtractorV2
,
VoxelFeatureExtractorV3
)
__all__
=
[
__all__
=
[
'PillarFeatureNet'
,
'
AlignedPillarFeatureNet'
,
'VoxelFeatureExtractor
'
,
'PillarFeatureNet'
,
'
HardVFE'
,
'DynamicVFE'
,
'HardSimpleVFE
'
,
'Dynamic
VFE'
,
'VoxelFeatureExtractorV2'
,
'VoxelFeatureExtractorV3
'
'Dynamic
SimpleVFE
'
]
]
mmdet3d/models/voxel_encoders/pillar_encoder.py
View file @
f2b01720
...
@@ -9,55 +9,54 @@ from .utils import PFNLayer, get_paddings_indicator
...
@@ -9,55 +9,54 @@ from .utils import PFNLayer, get_paddings_indicator
@
VOXEL_ENCODERS
.
register_module
()
@
VOXEL_ENCODERS
.
register_module
()
class
PillarFeatureNet
(
nn
.
Module
):
class
PillarFeatureNet
(
nn
.
Module
):
"""Pillar Feature Net.
The network prepares the pillar features and performs forward pass
through PFNLayers.
Args:
in_channels (int). Number of input features,
either x, y, z or x, y, z, r.
feat_channels (list[int]). Number of features in each of the
N PFNLayers.
with_distance (bool). Whether to include Euclidean distance
to points.
voxel_size (list[float]). Size of voxels, only utilize x and y
size.
point_cloud_range (list[float]). Point cloud range, only
utilizes x and y min.
"""
def
__init__
(
self
,
def
__init__
(
self
,
num_input_features
=
4
,
in_channels
=
4
,
use_norm
=
True
,
feat_channels
=
(
64
,
),
num_filters
=
(
64
,
),
with_distance
=
False
,
with_distance
=
False
,
with_cluster_center
=
True
,
with_cluster_center
=
True
,
with_voxel_center
=
True
,
with_voxel_center
=
True
,
voxel_size
=
(
0.2
,
0.2
,
4
),
voxel_size
=
(
0.2
,
0.2
,
4
),
point_cloud_range
=
(
0
,
-
40
,
-
3
,
70.4
,
40
,
1
),
point_cloud_range
=
(
0
,
-
40
,
-
3
,
70.4
,
40
,
1
),
norm_cfg
=
dict
(
type
=
'BN1d'
,
eps
=
1e-3
,
momentum
=
0.01
),
mode
=
'max'
):
mode
=
'max'
):
""" Pillar Feature Net.
The network prepares the pillar features and performs forward pass
through PFNLayers.
Args:
num_input_features (int). Number of input features,
either x, y, z or x, y, z, r.
use_norm (bool). Whether to include BatchNorm.
num_filters (list[int]). Number of features in each of the
N PFNLayers.
with_distance (bool). Whether to include Euclidean distance
to points.
voxel_size (list[float]). Size of voxels, only utilize x and y
size.
point_cloud_range (list[float>]). Point cloud range, only
utilize x and y min.
"""
super
(
PillarFeatureNet
,
self
).
__init__
()
super
(
PillarFeatureNet
,
self
).
__init__
()
assert
len
(
num_filter
s
)
>
0
assert
len
(
feat_channel
s
)
>
0
if
with_cluster_center
:
if
with_cluster_center
:
num_input_feature
s
+=
3
in_channel
s
+=
3
if
with_voxel_center
:
if
with_voxel_center
:
num_input_feature
s
+=
2
in_channel
s
+=
2
if
with_distance
:
if
with_distance
:
num_input_feature
s
+=
1
in_channel
s
+=
1
self
.
_with_distance
=
with_distance
self
.
_with_distance
=
with_distance
self
.
_with_cluster_center
=
with_cluster_center
self
.
_with_cluster_center
=
with_cluster_center
self
.
_with_voxel_center
=
with_voxel_center
self
.
_with_voxel_center
=
with_voxel_center
# Create PillarFeatureNet layers
# Create PillarFeatureNet layers
self
.
num_input_features
=
num_input_feature
s
self
.
in_channels
=
in_channel
s
num_filters
=
[
num_input_features
]
+
list
(
num_filter
s
)
feat_channels
=
[
in_channels
]
+
list
(
feat_channel
s
)
pfn_layers
=
[]
pfn_layers
=
[]
for
i
in
range
(
len
(
num_filter
s
)
-
1
):
for
i
in
range
(
len
(
feat_channel
s
)
-
1
):
in_filters
=
num_filter
s
[
i
]
in_filters
=
feat_channel
s
[
i
]
out_filters
=
num_filter
s
[
i
+
1
]
out_filters
=
feat_channel
s
[
i
+
1
]
if
i
<
len
(
num_filter
s
)
-
2
:
if
i
<
len
(
feat_channel
s
)
-
2
:
last_layer
=
False
last_layer
=
False
else
:
else
:
last_layer
=
True
last_layer
=
True
...
@@ -65,7 +64,7 @@ class PillarFeatureNet(nn.Module):
...
@@ -65,7 +64,7 @@ class PillarFeatureNet(nn.Module):
PFNLayer
(
PFNLayer
(
in_filters
,
in_filters
,
out_filters
,
out_filters
,
use_
norm
,
norm
_cfg
=
norm_cfg
,
last_layer
=
last_layer
,
last_layer
=
last_layer
,
mode
=
mode
))
mode
=
mode
))
self
.
pfn_layers
=
nn
.
ModuleList
(
pfn_layers
)
self
.
pfn_layers
=
nn
.
ModuleList
(
pfn_layers
)
...
@@ -122,9 +121,8 @@ class PillarFeatureNet(nn.Module):
...
@@ -122,9 +121,8 @@ class PillarFeatureNet(nn.Module):
class
DynamicPillarFeatureNet
(
PillarFeatureNet
):
class
DynamicPillarFeatureNet
(
PillarFeatureNet
):
def
__init__
(
self
,
def
__init__
(
self
,
num_input_features
=
4
,
in_channels
=
4
,
use_norm
=
True
,
feat_channels
=
(
64
,
),
num_filters
=
(
64
,
),
with_distance
=
False
,
with_distance
=
False
,
with_cluster_center
=
True
,
with_cluster_center
=
True
,
with_voxel_center
=
True
,
with_voxel_center
=
True
,
...
@@ -138,23 +136,23 @@ class DynamicPillarFeatureNet(PillarFeatureNet):
...
@@ -138,23 +136,23 @@ class DynamicPillarFeatureNet(PillarFeatureNet):
"""
"""
super
(
DynamicPillarFeatureNet
,
self
).
__init__
(
super
(
DynamicPillarFeatureNet
,
self
).
__init__
(
num_input_features
,
in_channels
,
use_norm
,
feat_channels
,
num_filters
,
with_distance
,
with_distance
,
with_cluster_center
=
with_cluster_center
,
with_cluster_center
=
with_cluster_center
,
with_voxel_center
=
with_voxel_center
,
with_voxel_center
=
with_voxel_center
,
voxel_size
=
voxel_size
,
voxel_size
=
voxel_size
,
point_cloud_range
=
point_cloud_range
,
point_cloud_range
=
point_cloud_range
,
norm_cfg
=
norm_cfg
,
mode
=
mode
)
mode
=
mode
)
num_filters
=
[
self
.
num_input_features
]
+
list
(
num_filter
s
)
feat_channels
=
[
self
.
in_channels
]
+
list
(
feat_channel
s
)
pfn_layers
=
[]
pfn_layers
=
[]
# TODO: currently only support one PFNLayer
# TODO: currently only support one PFNLayer
for
i
in
range
(
len
(
num_filter
s
)
-
1
):
for
i
in
range
(
len
(
feat_channel
s
)
-
1
):
in_filters
=
num_filter
s
[
i
]
in_filters
=
feat_channel
s
[
i
]
out_filters
=
num_filter
s
[
i
+
1
]
out_filters
=
feat_channel
s
[
i
+
1
]
if
i
>
0
:
if
i
>
0
:
in_filters
*=
2
in_filters
*=
2
norm_name
,
norm_layer
=
build_norm_layer
(
norm_cfg
,
out_filters
)
norm_name
,
norm_layer
=
build_norm_layer
(
norm_cfg
,
out_filters
)
...
@@ -235,145 +233,3 @@ class DynamicPillarFeatureNet(PillarFeatureNet):
...
@@ -235,145 +233,3 @@ class DynamicPillarFeatureNet(PillarFeatureNet):
features
=
torch
.
cat
([
point_feats
,
feat_per_point
],
dim
=
1
)
features
=
torch
.
cat
([
point_feats
,
feat_per_point
],
dim
=
1
)
return
voxel_feats
,
voxel_coors
return
voxel_feats
,
voxel_coors
@
VOXEL_ENCODERS
.
register_module
()
class
AlignedPillarFeatureNet
(
nn
.
Module
):
def
__init__
(
self
,
num_input_features
=
4
,
use_norm
=
True
,
num_filters
=
(
64
,
),
with_distance
=
False
,
with_cluster_center
=
True
,
with_voxel_center
=
True
,
voxel_size
=
(
0.2
,
0.2
,
4
),
point_cloud_range
=
(
0
,
-
40
,
-
3
,
70.4
,
40
,
1
),
mode
=
'max'
):
""" Pillar Feature Net.
The network prepares the pillar features and performs forward pass
through PFNLayers.
Args:
num_input_features (int): Number of input features, either x, y, z
or x, y, z, r.
use_norm (bool): Whether to include BatchNorm.
num_filters (list[int]): Number of features in each of the N
PFNLayers.
with_distance (bool): Whether to include Euclidean distance to
points.
voxel_size (list[float]): Size of voxels, only utilize x and y
size.
point_cloud_range: (list[float]): Point cloud range, only
utilize x and y min.
"""
super
(
AlignedPillarFeatureNet
,
self
).
__init__
()
assert
len
(
num_filters
)
>
0
if
with_cluster_center
:
print
(
'Use cluster center'
)
num_input_features
+=
3
if
with_voxel_center
:
print
(
'Use voxel center'
)
num_input_features
+=
2
if
with_distance
:
num_input_features
+=
1
self
.
_with_distance
=
with_distance
self
.
_with_cluster_center
=
with_cluster_center
self
.
_with_voxel_center
=
with_voxel_center
# Create PillarFeatureNet layers
num_filters
=
[
num_input_features
]
+
list
(
num_filters
)
pfn_layers
=
[]
for
i
in
range
(
len
(
num_filters
)
-
1
):
in_filters
=
num_filters
[
i
]
out_filters
=
num_filters
[
i
+
1
]
if
i
<
len
(
num_filters
)
-
2
:
last_layer
=
False
else
:
last_layer
=
True
pfn_layers
.
append
(
PFNLayer
(
in_filters
,
out_filters
,
use_norm
,
last_layer
=
last_layer
,
mode
=
mode
))
self
.
pfn_layers
=
nn
.
ModuleList
(
pfn_layers
)
# Need pillar (voxel) size and x/y offset in order to
# calculate pillar offset
self
.
vx
=
voxel_size
[
0
]
self
.
vy
=
voxel_size
[
1
]
self
.
vz
=
voxel_size
[
2
]
self
.
x_offset
=
self
.
vx
/
2
+
point_cloud_range
[
0
]
self
.
y_offset
=
self
.
vy
/
2
+
point_cloud_range
[
1
]
self
.
z_offset
=
self
.
vz
/
2
+
point_cloud_range
[
2
]
def
forward
(
self
,
features
,
num_points
,
coors
):
features_ls
=
[
features
]
# Find distance of x, y, and z from cluster center
if
self
.
_with_cluster_center
:
points_mean
=
features
[:,
:,
:
3
].
sum
(
dim
=
1
,
keepdim
=
True
)
/
num_points
.
type_as
(
features
).
view
(
-
1
,
1
,
1
)
f_cluster
=
features
[:,
:,
:
3
]
-
points_mean
features_ls
.
append
(
f_cluster
)
x_distance
=
features
[:,
:,
0
]
-
(
coors
[:,
3
].
type_as
(
features
).
unsqueeze
(
1
)
*
self
.
vx
+
self
.
x_offset
)
y_distance
=
features
[:,
:,
1
]
-
(
coors
[:,
2
].
type_as
(
features
).
unsqueeze
(
1
)
*
self
.
vy
+
self
.
y_offset
)
z_distance
=
features
[:,
:,
2
]
-
(
coors
[:,
1
].
type_as
(
features
).
unsqueeze
(
1
)
*
self
.
vz
+
self
.
z_offset
)
normed_x_distance
=
1
-
torch
.
abs
(
x_distance
/
self
.
vx
)
normed_y_distance
=
1
-
torch
.
abs
(
y_distance
/
self
.
vy
)
normed_z_distance
=
1
-
torch
.
abs
(
z_distance
/
self
.
vz
)
x_mask
=
torch
.
gt
(
normed_x_distance
,
0
).
type_as
(
features
)
y_mask
=
torch
.
gt
(
normed_y_distance
,
0
).
type_as
(
features
)
z_mask
=
torch
.
gt
(
normed_z_distance
,
0
).
type_as
(
features
)
nonzero_points_mask
=
x_mask
.
mul
(
y_mask
).
mul
(
z_mask
)
aligned_distance
=
normed_x_distance
.
mul
(
normed_y_distance
).
mul
(
normed_z_distance
).
mul
(
nonzero_points_mask
)
# Find distance of x, y, and z from pillar center
if
self
.
_with_voxel_center
:
f_center
=
features
[:,
:,
:
2
]
f_center
[:,
:,
0
]
=
f_center
[:,
:,
0
]
-
(
coors
[:,
3
].
type_as
(
features
).
unsqueeze
(
1
)
*
self
.
vx
+
self
.
x_offset
)
f_center
[:,
:,
1
]
=
f_center
[:,
:,
1
]
-
(
coors
[:,
2
].
type_as
(
features
).
unsqueeze
(
1
)
*
self
.
vy
+
self
.
y_offset
)
features_ls
.
append
(
f_center
)
if
self
.
_with_distance
:
points_dist
=
torch
.
norm
(
features
[:,
:,
:
3
],
2
,
2
,
keepdim
=
True
)
features_ls
.
append
(
points_dist
)
# Combine together feature decorations
features
=
torch
.
cat
(
features_ls
,
dim
=-
1
)
# The feature decorations were calculated without regard to
# whether pillar was empty. Need to ensure that
# empty pillars remain set to zeros.
voxel_count
=
features
.
shape
[
1
]
mask
=
get_paddings_indicator
(
num_points
,
voxel_count
,
axis
=
0
)
mask
=
torch
.
unsqueeze
(
mask
,
-
1
).
type_as
(
features
)
features
*=
mask
for
pfn
in
self
.
pfn_layers
:
if
pfn
.
last_vfe
:
features
=
pfn
(
features
,
aligned_distance
)
else
:
features
=
pfn
(
features
)
return
features
.
squeeze
()
mmdet3d/models/voxel_encoders/utils.py
View file @
f2b01720
...
@@ -4,28 +4,15 @@ from torch import nn
...
@@ -4,28 +4,15 @@ from torch import nn
from
torch.nn
import
functional
as
F
from
torch.nn
import
functional
as
F
class
Empty
(
nn
.
Module
):
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
(
Empty
,
self
).
__init__
()
def
forward
(
self
,
*
args
,
**
kwargs
):
if
len
(
args
)
==
1
:
return
args
[
0
]
elif
len
(
args
)
==
0
:
return
None
return
args
def
get_paddings_indicator
(
actual_num
,
max_num
,
axis
=
0
):
def
get_paddings_indicator
(
actual_num
,
max_num
,
axis
=
0
):
"""Create boolean mask by actually number of a padded tensor.
"""Create boolean mask by actually number of a padded tensor.
Args:
Args:
actual_num (
[type]): [description]
actual_num (
torch.Tensor): Actual number of points in each voxel.
max_num (
[type]): [description]
max_num (
int): Max number of points in each voxel
Returns:
Returns:
[type]: [description]
torch.Tensor: Mask indicates which points are valid inside a voxel.
"""
"""
actual_num
=
torch
.
unsqueeze
(
actual_num
,
axis
+
1
)
actual_num
=
torch
.
unsqueeze
(
actual_num
,
axis
+
1
)
# tiled_actual_num: [N, M, 1]
# tiled_actual_num: [N, M, 1]
...
@@ -52,13 +39,9 @@ class VFELayer(nn.Module):
...
@@ -52,13 +39,9 @@ class VFELayer(nn.Module):
self
.
cat_max
=
cat_max
self
.
cat_max
=
cat_max
self
.
max_out
=
max_out
self
.
max_out
=
max_out
# self.units = int(out_channels / 2)
# self.units = int(out_channels / 2)
if
norm_cfg
:
norm_name
,
norm_layer
=
build_norm_layer
(
norm_cfg
,
out_channels
)
self
.
norm
=
build_norm_layer
(
norm_cfg
,
out_channels
)[
1
]
self
.
norm
=
norm_layer
self
.
linear
=
nn
.
Linear
(
in_channels
,
out_channels
,
bias
=
False
)
self
.
linear
=
nn
.
Linear
(
in_channels
,
out_channels
,
bias
=
False
)
else
:
self
.
norm
=
Empty
(
out_channels
)
self
.
linear
=
nn
.
Linear
(
in_channels
,
out_channels
,
bias
=
True
)
def
forward
(
self
,
inputs
):
def
forward
(
self
,
inputs
):
# [K, T, 7] tensordot [7, units] = [K, T, units]
# [K, T, 7] tensordot [7, units] = [K, T, units]
...
@@ -89,7 +72,7 @@ class PFNLayer(nn.Module):
...
@@ -89,7 +72,7 @@ class PFNLayer(nn.Module):
def
__init__
(
self
,
def
__init__
(
self
,
in_channels
,
in_channels
,
out_channels
,
out_channels
,
use_norm
=
True
,
norm_cfg
=
dict
(
type
=
'BN1d'
,
eps
=
1e-3
,
momentum
=
0.01
)
,
last_layer
=
False
,
last_layer
=
False
,
mode
=
'max'
):
mode
=
'max'
):
""" Pillar Feature Net Layer.
""" Pillar Feature Net Layer.
...
@@ -100,9 +83,11 @@ class PFNLayer(nn.Module):
...
@@ -100,9 +83,11 @@ class PFNLayer(nn.Module):
Args:
Args:
in_channels (int): Number of input channels.
in_channels (int): Number of input channels.
out_channels (int): Number of output channels.
out_channels (int): Number of output channels.
use_norm (bool): Whether to include BatchNorm.
norm_cfg (dict): Config dict of normalization layers
last_layer (bool): If last_layer, there is no concatenation of
last_layer (bool): If last_layer, there is no concatenation of
features.
features.
mode (str): Pooling model to gather features inside voxels.
Default to 'max'.
"""
"""
super
().
__init__
()
super
().
__init__
()
...
@@ -112,13 +97,10 @@ class PFNLayer(nn.Module):
...
@@ -112,13 +97,10 @@ class PFNLayer(nn.Module):
out_channels
=
out_channels
//
2
out_channels
=
out_channels
//
2
self
.
units
=
out_channels
self
.
units
=
out_channels
if
use_norm
:
self
.
norm
=
build_norm_layer
(
norm_cfg
,
self
.
units
)[
1
]
self
.
norm
=
nn
.
BatchNorm1d
(
self
.
units
,
eps
=
1e-3
,
momentum
=
0.01
)
self
.
linear
=
nn
.
Linear
(
in_channels
,
self
.
units
,
bias
=
False
)
self
.
linear
=
nn
.
Linear
(
in_channels
,
self
.
units
,
bias
=
False
)
else
:
self
.
norm
=
Empty
(
self
.
unints
)
self
.
linear
=
nn
.
Linear
(
in_channels
,
self
.
units
,
bias
=
True
)
assert
mode
in
[
'max'
,
'avg'
]
self
.
mode
=
mode
self
.
mode
=
mode
def
forward
(
self
,
inputs
,
num_voxels
=
None
,
aligned_distance
=
None
):
def
forward
(
self
,
inputs
,
num_voxels
=
None
,
aligned_distance
=
None
):
...
...
mmdet3d/models/voxel_encoders/voxel_encoder.py
View file @
f2b01720
import
torch
import
torch
from
mmcv.cnn
import
build_norm_layer
from
mmcv.cnn
import
build_norm_layer
from
torch
import
nn
from
torch
import
nn
from
torch.nn
import
functional
as
F
from
mmdet3d.ops
import
DynamicScatter
from
mmdet3d.ops
import
DynamicScatter
from
..
import
builder
from
..
import
builder
from
..registry
import
VOXEL_ENCODERS
from
..registry
import
VOXEL_ENCODERS
from
.utils
import
Empty
,
VFELayer
,
get_paddings_indicator
from
.utils
import
VFELayer
,
get_paddings_indicator
@
VOXEL_ENCODERS
.
register_module
()
@
VOXEL_ENCODERS
.
register_module
()
class
VoxelFeatureExtractor
(
nn
.
Module
):
class
HardSimpleVFE
(
nn
.
Module
):
"""Simple voxel feature encoder used in SECOND
def
__init__
(
self
,
It simply averages the values of points in a voxel.
num_input_features
=
4
,
"""
use_norm
=
True
,
num_filters
=
[
32
,
128
],
with_distance
=
False
,
name
=
'VoxelFeatureExtractor'
):
super
(
VoxelFeatureExtractor
,
self
).
__init__
()
self
.
name
=
name
assert
len
(
num_filters
)
==
2
num_input_features
+=
3
# add mean features
if
with_distance
:
num_input_features
+=
1
self
.
_with_distance
=
with_distance
self
.
vfe1
=
VFELayer
(
num_input_features
,
num_filters
[
0
],
use_norm
)
self
.
vfe2
=
VFELayer
(
num_filters
[
0
],
num_filters
[
1
],
use_norm
)
if
use_norm
:
self
.
linear
=
nn
.
Linear
(
num_filters
[
1
],
num_filters
[
1
],
bias
=
False
)
self
.
norm
=
nn
.
BatchNorm1d
(
num_filters
[
1
],
eps
=
1e-3
,
momentum
=
0.01
)
else
:
self
.
linear
=
nn
.
Linear
(
num_filters
[
1
],
num_filters
[
1
],
bias
=
True
)
self
.
norm
=
Empty
(
num_filters
[
1
])
def
forward
(
self
,
features
,
num_voxels
,
**
kwargs
):
# features: [concated_num_points, num_voxel_size, 3(4)]
# num_voxels: [concated_num_points]
# t = time.time()
# torch.cuda.synchronize()
points_mean
=
features
[:,
:,
:
3
].
sum
(
dim
=
1
,
keepdim
=
True
)
/
num_voxels
.
type_as
(
features
).
view
(
-
1
,
1
,
1
)
features_relative
=
features
[:,
:,
:
3
]
-
points_mean
if
self
.
_with_distance
:
points_dist
=
torch
.
norm
(
features
[:,
:,
:
3
],
2
,
2
,
keepdim
=
True
)
features
=
torch
.
cat
([
features
,
features_relative
,
points_dist
],
dim
=-
1
)
else
:
features
=
torch
.
cat
([
features
,
features_relative
],
dim
=-
1
)
voxel_count
=
features
.
shape
[
1
]
mask
=
get_paddings_indicator
(
num_voxels
,
voxel_count
,
axis
=
0
)
mask
=
torch
.
unsqueeze
(
mask
,
-
1
).
type_as
(
features
)
# mask = features.max(dim=2, keepdim=True)[0] != 0
# torch.cuda.synchronize()
# print("vfe prep forward time", time.time() - t)
x
=
self
.
vfe1
(
features
)
x
*=
mask
x
=
self
.
vfe2
(
x
)
x
*=
mask
x
=
self
.
linear
(
x
)
x
=
self
.
norm
(
x
.
permute
(
0
,
2
,
1
).
contiguous
()).
permute
(
0
,
2
,
1
).
contiguous
()
x
=
F
.
relu
(
x
)
x
*=
mask
# x: [concated_num_points, num_voxel_size, 128]
voxelwise
=
torch
.
max
(
x
,
dim
=
1
)[
0
]
return
voxelwise
def
__init__
(
self
):
@
VOXEL_ENCODERS
.
register_module
()
super
(
HardSimpleVFE
,
self
).
__init__
()
class
VoxelFeatureExtractorV2
(
nn
.
Module
):
def
__init__
(
self
,
num_input_features
=
4
,
use_norm
=
True
,
num_filters
=
[
32
,
128
],
with_distance
=
False
,
name
=
'VoxelFeatureExtractor'
):
super
(
VoxelFeatureExtractorV2
,
self
).
__init__
()
self
.
name
=
name
assert
len
(
num_filters
)
>
0
num_input_features
+=
3
if
with_distance
:
num_input_features
+=
1
self
.
_with_distance
=
with_distance
num_filters
=
[
num_input_features
]
+
num_filters
filters_pairs
=
[[
num_filters
[
i
],
num_filters
[
i
+
1
]]
for
i
in
range
(
len
(
num_filters
)
-
1
)]
self
.
vfe_layers
=
nn
.
ModuleList
(
[
VFELayer
(
i
,
o
,
use_norm
)
for
i
,
o
in
filters_pairs
])
if
use_norm
:
self
.
linear
=
nn
.
Linear
(
num_filters
[
-
1
],
num_filters
[
-
1
],
bias
=
False
)
self
.
norm
=
nn
.
BatchNorm1d
(
num_filters
[
-
1
],
eps
=
1e-3
,
momentum
=
0.01
)
else
:
self
.
linear
=
nn
.
Linear
(
num_filters
[
-
1
],
num_filters
[
-
1
],
bias
=
True
)
self
.
norm
=
Empty
(
num_filters
[
-
1
])
def
forward
(
self
,
features
,
num_voxels
,
**
kwargs
):
# features: [concated_num_points, num_voxel_size, 3(4)]
# num_voxels: [concated_num_points]
points_mean
=
features
[:,
:,
:
3
].
sum
(
dim
=
1
,
keepdim
=
True
)
/
num_voxels
.
type_as
(
features
).
view
(
-
1
,
1
,
1
)
features_relative
=
features
[:,
:,
:
3
]
-
points_mean
if
self
.
_with_distance
:
points_dist
=
torch
.
norm
(
features
[:,
:,
:
3
],
2
,
2
,
keepdim
=
True
)
features
=
torch
.
cat
([
features
,
features_relative
,
points_dist
],
dim
=-
1
)
else
:
features
=
torch
.
cat
([
features
,
features_relative
],
dim
=-
1
)
voxel_count
=
features
.
shape
[
1
]
mask
=
get_paddings_indicator
(
num_voxels
,
voxel_count
,
axis
=
0
)
mask
=
torch
.
unsqueeze
(
mask
,
-
1
).
type_as
(
features
)
for
vfe
in
self
.
vfe_layers
:
features
=
vfe
(
features
)
features
*=
mask
features
=
self
.
linear
(
features
)
features
=
self
.
norm
(
features
.
permute
(
0
,
2
,
1
).
contiguous
()).
permute
(
0
,
2
,
1
).
contiguous
()
features
=
F
.
relu
(
features
)
features
*=
mask
# x: [concated_num_points, num_voxel_size, 128]
voxelwise
=
torch
.
max
(
features
,
dim
=
1
)[
0
]
return
voxelwise
@
VOXEL_ENCODERS
.
register_module
()
class
VoxelFeatureExtractorV3
(
nn
.
Module
):
def
__init__
(
self
,
num_input_features
=
4
,
use_norm
=
True
,
num_filters
=
[
32
,
128
],
with_distance
=
False
,
name
=
'VoxelFeatureExtractor'
):
super
(
VoxelFeatureExtractorV3
,
self
).
__init__
()
self
.
name
=
name
def
forward
(
self
,
features
,
num_points
,
coors
):
def
forward
(
self
,
features
,
num_points
,
coors
):
# features: [concated_num_points, num_voxel_size, 3(4)]
# features: [concated_num_points, num_voxel_size, 3(4)]
...
@@ -153,13 +27,21 @@ class VoxelFeatureExtractorV3(nn.Module):
...
@@ -153,13 +27,21 @@ class VoxelFeatureExtractorV3(nn.Module):
@
VOXEL_ENCODERS
.
register_module
()
@
VOXEL_ENCODERS
.
register_module
()
class
DynamicVFEV3
(
nn
.
Module
):
class
DynamicSimpleVFE
(
nn
.
Module
):
"""Simple dynamic voxel feature encoder used in DV-SECOND
It simply averages the values of points in a voxel.
But the number of points in a voxel is dynamic and varies.
Args:
voxel_size (tupe[float]): Size of a single voxel
point_cloud_range (tuple[float]): Range of the point cloud and voxels
"""
def
__init__
(
self
,
def
__init__
(
self
,
num_input_features
=
4
,
voxel_size
=
(
0.2
,
0.2
,
4
),
voxel_size
=
(
0.2
,
0.2
,
4
),
point_cloud_range
=
(
0
,
-
40
,
-
3
,
70.4
,
40
,
1
)):
point_cloud_range
=
(
0
,
-
40
,
-
3
,
70.4
,
40
,
1
)):
super
(
DynamicVFE
V3
,
self
).
__init__
()
super
(
Dynamic
Simple
VFE
,
self
).
__init__
()
self
.
scatter
=
DynamicScatter
(
voxel_size
,
point_cloud_range
,
True
)
self
.
scatter
=
DynamicScatter
(
voxel_size
,
point_cloud_range
,
True
)
@
torch
.
no_grad
()
@
torch
.
no_grad
()
...
@@ -172,10 +54,37 @@ class DynamicVFEV3(nn.Module):
...
@@ -172,10 +54,37 @@ class DynamicVFEV3(nn.Module):
@
VOXEL_ENCODERS
.
register_module
()
@
VOXEL_ENCODERS
.
register_module
()
class
DynamicVFE
(
nn
.
Module
):
class
DynamicVFE
(
nn
.
Module
):
"""Dynamic Voxel feature encoder used in DV-SECOND
It encodes features of voxels and their points. It could also fuse
image feature into voxel features in a point-wise manner.
The number of points inside the voxel varies.
Args:
in_channels (int): Input channels of VFE. Defaults to 4.
feat_channels (list(int)): Channels of features in VFE.
with_distance (bool): Whether to use the L2 distance of points to the
origin point. Default False.
with_cluster_center (bool): Whether to use the distance to cluster
center of points inside a voxel. Default to False.
with_voxel_center (bool): Whether to use the distance to center of
voxel for each points inside a voxel. Default to False.
voxel_size (tuple[float]): Size of a single voxel. Default to
(0.2, 0.2, 4).
point_cloud_range (tuple[float]): The range of points or voxels.
Default to (0, -40, -3, 70.4, 40, 1).
norm_cfg (dict): Config dict of normalization layers.
mode (str): The mode when pooling features of points inside a voxel.
Available options include 'max' and 'avg'. Default to 'max'.
fusion_layer (dict | None): The config dict of fusion layer used in
multi-modal detectors. Default to None.
return_point_feats (bool): Whether to return the features of each
points. Default to False.
"""
def
__init__
(
self
,
def
__init__
(
self
,
num_input_feature
s
=
4
,
in_channel
s
=
4
,
num_filter
s
=
[],
feat_channel
s
=
[],
with_distance
=
False
,
with_distance
=
False
,
with_cluster_center
=
False
,
with_cluster_center
=
False
,
with_voxel_center
=
False
,
with_voxel_center
=
False
,
...
@@ -186,14 +95,15 @@ class DynamicVFE(nn.Module):
...
@@ -186,14 +95,15 @@ class DynamicVFE(nn.Module):
fusion_layer
=
None
,
fusion_layer
=
None
,
return_point_feats
=
False
):
return_point_feats
=
False
):
super
(
DynamicVFE
,
self
).
__init__
()
super
(
DynamicVFE
,
self
).
__init__
()
assert
len
(
num_filters
)
>
0
assert
mode
in
[
'avg'
,
'max'
]
assert
len
(
feat_channels
)
>
0
if
with_cluster_center
:
if
with_cluster_center
:
num_input_feature
s
+=
3
in_channel
s
+=
3
if
with_voxel_center
:
if
with_voxel_center
:
num_input_feature
s
+=
3
in_channel
s
+=
3
if
with_distance
:
if
with_distance
:
num_input_feature
s
+=
3
in_channel
s
+=
3
self
.
num_input_features
=
num_input_feature
s
self
.
in_channels
=
in_channel
s
self
.
_with_distance
=
with_distance
self
.
_with_distance
=
with_distance
self
.
_with_cluster_center
=
with_cluster_center
self
.
_with_cluster_center
=
with_cluster_center
self
.
_with_voxel_center
=
with_voxel_center
self
.
_with_voxel_center
=
with_voxel_center
...
@@ -209,11 +119,11 @@ class DynamicVFE(nn.Module):
...
@@ -209,11 +119,11 @@ class DynamicVFE(nn.Module):
self
.
point_cloud_range
=
point_cloud_range
self
.
point_cloud_range
=
point_cloud_range
self
.
scatter
=
DynamicScatter
(
voxel_size
,
point_cloud_range
,
True
)
self
.
scatter
=
DynamicScatter
(
voxel_size
,
point_cloud_range
,
True
)
num_filters
=
[
self
.
num_input_features
]
+
list
(
num_filter
s
)
feat_channels
=
[
self
.
in_channels
]
+
list
(
feat_channel
s
)
vfe_layers
=
[]
vfe_layers
=
[]
for
i
in
range
(
len
(
num_filter
s
)
-
1
):
for
i
in
range
(
len
(
feat_channel
s
)
-
1
):
in_filters
=
num_filter
s
[
i
]
in_filters
=
feat_channel
s
[
i
]
out_filters
=
num_filter
s
[
i
+
1
]
out_filters
=
feat_channel
s
[
i
+
1
]
if
i
>
0
:
if
i
>
0
:
in_filters
*=
2
in_filters
*=
2
norm_name
,
norm_layer
=
build_norm_layer
(
norm_cfg
,
out_filters
)
norm_name
,
norm_layer
=
build_norm_layer
(
norm_cfg
,
out_filters
)
...
@@ -232,6 +142,16 @@ class DynamicVFE(nn.Module):
...
@@ -232,6 +142,16 @@ class DynamicVFE(nn.Module):
self
.
fusion_layer
=
builder
.
build_fusion_layer
(
fusion_layer
)
self
.
fusion_layer
=
builder
.
build_fusion_layer
(
fusion_layer
)
def
map_voxel_center_to_point
(
self
,
pts_coors
,
voxel_mean
,
voxel_coors
):
def
map_voxel_center_to_point
(
self
,
pts_coors
,
voxel_mean
,
voxel_coors
):
"""Map voxel features to its corresponding points.
Args:
pts_coors (torch.Tensor): Voxel coordinate of each point.
voxel_mean (torch.Tensor): Voxel features to be mapped.
voxel_coors (torch.Tensor): Coordinates of valid voxels
Returns:
torch.Tensor: Features or centers of each point.
"""
# Step 1: scatter voxel into canvas
# Step 1: scatter voxel into canvas
# Calculate necessary things for canvas creation
# Calculate necessary things for canvas creation
canvas_z
=
int
(
canvas_z
=
int
(
...
@@ -269,9 +189,21 @@ class DynamicVFE(nn.Module):
...
@@ -269,9 +189,21 @@ class DynamicVFE(nn.Module):
points
=
None
,
points
=
None
,
img_feats
=
None
,
img_feats
=
None
,
img_meta
=
None
):
img_meta
=
None
):
"""
"""Forward functions
features (torch.Tensor): NxC
coors (torch.Tensor): Nx(1+NDim)
Args:
features (torch.Tensor): Features of voxels, shape is NxC.
coors (torch.Tensor): Coordinates of voxels, shape is Nx(1+NDim).
points (list[torch.Tensor], optional): Raw points used to guide the
multi-modality fusion. Defaults to None.
img_feats (list[torch.Tensor], optional): Image fetures used for
multi-modality fusion. Defaults to None.
img_meta (dict, optional): [description]. Defaults to None.
Returns:
tuple: If `return_point_feats` is False, returns voxel features and
its coordinates. If `return_point_feats` is True, returns
feature of each points inside voxels.
"""
"""
features_ls
=
[
features
]
features_ls
=
[
features
]
# Find distance of x, y, and z from cluster center
# Find distance of x, y, and z from cluster center
...
@@ -320,10 +252,36 @@ class DynamicVFE(nn.Module):
...
@@ -320,10 +252,36 @@ class DynamicVFE(nn.Module):
@
VOXEL_ENCODERS
.
register_module
()
@
VOXEL_ENCODERS
.
register_module
()
class
HardVFE
(
nn
.
Module
):
class
HardVFE
(
nn
.
Module
):
"""Voxel feature encoder used in DV-SECOND
It encodes features of voxels and their points. It could also fuse
image feature into voxel features in a point-wise manner.
Args:
in_channels (int): Input channels of VFE. Defaults to 4.
feat_channels (list(int)): Channels of features in VFE.
with_distance (bool): Whether to use the L2 distance of points to the
origin point. Default False.
with_cluster_center (bool): Whether to use the distance to cluster
center of points inside a voxel. Default to False.
with_voxel_center (bool): Whether to use the distance to center of
voxel for each points inside a voxel. Default to False.
voxel_size (tuple[float]): Size of a single voxel. Default to
(0.2, 0.2, 4).
point_cloud_range (tuple[float]): The range of points or voxels.
Default to (0, -40, -3, 70.4, 40, 1).
norm_cfg (dict): Config dict of normalization layers.
mode (str): The mode when pooling features of points inside a voxel.
Available options include 'max' and 'avg'. Default to 'max'.
fusion_layer (dict | None): The config dict of fusion layer used in
multi-modal detectors. Default to None.
return_point_feats (bool): Whether to return the features of each
points. Default to False.
"""
def
__init__
(
self
,
def
__init__
(
self
,
num_input_feature
s
=
4
,
in_channel
s
=
4
,
num_filter
s
=
[],
feat_channel
s
=
[],
with_distance
=
False
,
with_distance
=
False
,
with_cluster_center
=
False
,
with_cluster_center
=
False
,
with_voxel_center
=
False
,
with_voxel_center
=
False
,
...
@@ -334,14 +292,14 @@ class HardVFE(nn.Module):
...
@@ -334,14 +292,14 @@ class HardVFE(nn.Module):
fusion_layer
=
None
,
fusion_layer
=
None
,
return_point_feats
=
False
):
return_point_feats
=
False
):
super
(
HardVFE
,
self
).
__init__
()
super
(
HardVFE
,
self
).
__init__
()
assert
len
(
num_filter
s
)
>
0
assert
len
(
feat_channel
s
)
>
0
if
with_cluster_center
:
if
with_cluster_center
:
num_input_feature
s
+=
3
in_channel
s
+=
3
if
with_voxel_center
:
if
with_voxel_center
:
num_input_feature
s
+=
3
in_channel
s
+=
3
if
with_distance
:
if
with_distance
:
num_input_feature
s
+=
3
in_channel
s
+=
3
self
.
num_input_features
=
num_input_feature
s
self
.
in_channels
=
in_channel
s
self
.
_with_distance
=
with_distance
self
.
_with_distance
=
with_distance
self
.
_with_cluster_center
=
with_cluster_center
self
.
_with_cluster_center
=
with_cluster_center
self
.
_with_voxel_center
=
with_voxel_center
self
.
_with_voxel_center
=
with_voxel_center
...
@@ -357,16 +315,16 @@ class HardVFE(nn.Module):
...
@@ -357,16 +315,16 @@ class HardVFE(nn.Module):
self
.
point_cloud_range
=
point_cloud_range
self
.
point_cloud_range
=
point_cloud_range
self
.
scatter
=
DynamicScatter
(
voxel_size
,
point_cloud_range
,
True
)
self
.
scatter
=
DynamicScatter
(
voxel_size
,
point_cloud_range
,
True
)
num_filters
=
[
self
.
num_input_features
]
+
list
(
num_filter
s
)
feat_channels
=
[
self
.
in_channels
]
+
list
(
feat_channel
s
)
vfe_layers
=
[]
vfe_layers
=
[]
for
i
in
range
(
len
(
num_filter
s
)
-
1
):
for
i
in
range
(
len
(
feat_channel
s
)
-
1
):
in_filters
=
num_filter
s
[
i
]
in_filters
=
feat_channel
s
[
i
]
out_filters
=
num_filter
s
[
i
+
1
]
out_filters
=
feat_channel
s
[
i
+
1
]
if
i
>
0
:
if
i
>
0
:
in_filters
*=
2
in_filters
*=
2
# TODO: pass norm_cfg to VFE
# TODO: pass norm_cfg to VFE
# norm_name, norm_layer = build_norm_layer(norm_cfg, out_filters)
# norm_name, norm_layer = build_norm_layer(norm_cfg, out_filters)
if
i
==
(
len
(
num_filter
s
)
-
2
):
if
i
==
(
len
(
feat_channel
s
)
-
2
):
cat_max
=
False
cat_max
=
False
max_out
=
True
max_out
=
True
if
fusion_layer
:
if
fusion_layer
:
...
@@ -394,9 +352,20 @@ class HardVFE(nn.Module):
...
@@ -394,9 +352,20 @@ class HardVFE(nn.Module):
coors
,
coors
,
img_feats
=
None
,
img_feats
=
None
,
img_meta
=
None
):
img_meta
=
None
):
"""
"""Forward functions
features (torch.Tensor): NxMxC
coors (torch.Tensor): Nx(1+NDim)
Args:
features (torch.Tensor): Features of voxels, shape is MxNxC.
num_points (torch.Tensor): Number of points in each voxel.
coors (torch.Tensor): Coordinates of voxels, shape is Mx(1+NDim).
img_feats (list[torch.Tensor], optional): Image fetures used for
multi-modality fusion. Defaults to None.
img_meta (dict, optional): [description]. Defaults to None.
Returns:
tuple: If `return_point_feats` is False, returns voxel features and
its coordinates. If `return_point_feats` is True, returns
feature of each points inside voxels.
"""
"""
features_ls
=
[
features
]
features_ls
=
[
features
]
# Find distance of x, y, and z from cluster center
# Find distance of x, y, and z from cluster center
...
@@ -438,19 +407,29 @@ class HardVFE(nn.Module):
...
@@ -438,19 +407,29 @@ class HardVFE(nn.Module):
for
i
,
vfe
in
enumerate
(
self
.
vfe_layers
):
for
i
,
vfe
in
enumerate
(
self
.
vfe_layers
):
voxel_feats
=
vfe
(
voxel_feats
)
voxel_feats
=
vfe
(
voxel_feats
)
if
torch
.
isnan
(
voxel_feats
).
any
():
import
pdb
pdb
.
set_trace
()
if
(
self
.
fusion_layer
is
not
None
and
img_feats
is
not
None
):
if
(
self
.
fusion_layer
is
not
None
and
img_feats
is
not
None
):
voxel_feats
=
self
.
fusion_with_mask
(
features
,
mask
,
voxel_feats
,
voxel_feats
=
self
.
fusion_with_mask
(
features
,
mask
,
voxel_feats
,
coors
,
img_feats
,
img_meta
)
coors
,
img_feats
,
img_meta
)
if
torch
.
isnan
(
voxel_feats
).
any
():
import
pdb
pdb
.
set_trace
()
return
voxel_feats
return
voxel_feats
def
fusion_with_mask
(
self
,
features
,
mask
,
voxel_feats
,
coors
,
img_feats
,
def
fusion_with_mask
(
self
,
features
,
mask
,
voxel_feats
,
coors
,
img_feats
,
img_meta
):
img_meta
):
"""Fuse image and point features with mask.
Args:
features (torch.Tensor): Features of voxel, usually it is the
values of points in voxels.
mask (torch.Tensor): Mask indicates valid features in each voxel.
voxel_feats (torch.Tensor): Features of voxels.
coors (torch.Tensor): Coordinates of each single voxel.
img_feats (list[torch.Tensor]): Multi-scale feature maps of image.
img_meta (list(dict)): Meta information of image and points.
Returns:
torch.Tensor: Fused features of each voxel.
"""
# the features is consist of a batch of points
# the features is consist of a batch of points
batch_size
=
coors
[
-
1
,
0
]
+
1
batch_size
=
coors
[
-
1
,
0
]
+
1
points
=
[]
points
=
[]
...
@@ -459,20 +438,13 @@ class HardVFE(nn.Module):
...
@@ -459,20 +438,13 @@ class HardVFE(nn.Module):
points
.
append
(
features
[
single_mask
][
mask
[
single_mask
]])
points
.
append
(
features
[
single_mask
][
mask
[
single_mask
]])
point_feats
=
voxel_feats
[
mask
]
point_feats
=
voxel_feats
[
mask
]
if
torch
.
isnan
(
point_feats
).
any
():
import
pdb
pdb
.
set_trace
()
point_feats
=
self
.
fusion_layer
(
img_feats
,
points
,
point_feats
,
point_feats
=
self
.
fusion_layer
(
img_feats
,
points
,
point_feats
,
img_meta
)
img_meta
)
if
torch
.
isnan
(
point_feats
).
any
():
import
pdb
pdb
.
set_trace
()
voxel_canvas
=
voxel_feats
.
new_zeros
(
voxel_canvas
=
voxel_feats
.
new_zeros
(
size
=
(
voxel_feats
.
size
(
0
),
voxel_feats
.
size
(
1
),
size
=
(
voxel_feats
.
size
(
0
),
voxel_feats
.
size
(
1
),
point_feats
.
size
(
-
1
)))
point_feats
.
size
(
-
1
)))
voxel_canvas
[
mask
]
=
point_feats
voxel_canvas
[
mask
]
=
point_feats
out
=
torch
.
max
(
voxel_canvas
,
dim
=
1
)[
0
]
out
=
torch
.
max
(
voxel_canvas
,
dim
=
1
)[
0
]
if
torch
.
isnan
(
out
).
any
():
import
pdb
pdb
.
set_trace
()
return
out
return
out
tests/test_box3d.py
View file @
f2b01720
...
@@ -3,7 +3,7 @@ import pytest
...
@@ -3,7 +3,7 @@ import pytest
import
torch
import
torch
from
mmdet3d.core.bbox
import
(
Box3DMode
,
CameraInstance3DBoxes
,
from
mmdet3d.core.bbox
import
(
Box3DMode
,
CameraInstance3DBoxes
,
LiDARInstance3DBoxes
)
DepthInstance3DBoxes
,
LiDARInstance3DBoxes
)
def
test_lidar_boxes3d
():
def
test_lidar_boxes3d
():
...
@@ -13,6 +13,46 @@ def test_lidar_boxes3d():
...
@@ -13,6 +13,46 @@ def test_lidar_boxes3d():
assert
boxes
.
tensor
.
shape
[
0
]
==
0
assert
boxes
.
tensor
.
shape
[
0
]
==
0
assert
boxes
.
tensor
.
shape
[
1
]
==
7
assert
boxes
.
tensor
.
shape
[
1
]
==
7
# Test init with origin
gravity_center_box
=
np
.
array
(
[[
-
5.24223238e+00
,
4.00209696e+01
,
2.97570381e-01
,
2.06200000e+00
,
4.40900000e+00
,
1.54800000e+00
,
-
1.48801203e+00
],
[
-
2.66751588e+01
,
5.59499564e+00
,
-
9.14345860e-01
,
3.43000000e-01
,
4.58000000e-01
,
7.82000000e-01
,
-
4.62759755e+00
],
[
-
5.80979675e+00
,
3.54092357e+01
,
2.00889888e-01
,
2.39600000e+00
,
3.96900000e+00
,
1.73200000e+00
,
-
4.65203216e+00
],
[
-
3.13086877e+01
,
1.09007628e+00
,
-
1.94612112e-01
,
1.94400000e+00
,
3.85700000e+00
,
1.72300000e+00
,
-
2.81427027e+00
]],
dtype
=
np
.
float32
)
bottom_center_box
=
LiDARInstance3DBoxes
(
gravity_center_box
,
origin
=
[
0.5
,
0.5
,
0.5
])
expected_tensor
=
torch
.
tensor
(
[[
-
5.24223238e+00
,
4.00209696e+01
,
-
4.76429619e-01
,
2.06200000e+00
,
4.40900000e+00
,
1.54800000e+00
,
-
1.48801203e+00
],
[
-
2.66751588e+01
,
5.59499564e+00
,
-
1.30534586e+00
,
3.43000000e-01
,
4.58000000e-01
,
7.82000000e-01
,
-
4.62759755e+00
],
[
-
5.80979675e+00
,
3.54092357e+01
,
-
6.65110112e-01
,
2.39600000e+00
,
3.96900000e+00
,
1.73200000e+00
,
-
4.65203216e+00
],
[
-
3.13086877e+01
,
1.09007628e+00
,
-
1.05611211e+00
,
1.94400000e+00
,
3.85700000e+00
,
1.72300000e+00
,
-
2.81427027e+00
]])
assert
torch
.
allclose
(
expected_tensor
,
bottom_center_box
.
tensor
)
# Test init with numpy array
# Test init with numpy array
np_boxes
=
np
.
array
(
np_boxes
=
np
.
array
(
[[
1.7802081
,
2.516249
,
-
1.7501148
,
1.75
,
3.39
,
1.65
,
1.48
],
[[
1.7802081
,
2.516249
,
-
1.7501148
,
1.75
,
3.39
,
1.65
,
1.48
],
...
@@ -70,9 +110,19 @@ def test_lidar_boxes3d():
...
@@ -70,9 +110,19 @@ def test_lidar_boxes3d():
[
28.2967
,
0.5557558
,
-
1.303325
,
1.47
,
2.23
,
1.48
,
4.7115927
],
[
28.2967
,
0.5557558
,
-
1.303325
,
1.47
,
2.23
,
1.48
,
4.7115927
],
[
26.66902
,
-
21.82302
,
-
1.736057
,
1.56
,
3.48
,
1.4
,
4.8315926
],
[
26.66902
,
-
21.82302
,
-
1.736057
,
1.56
,
3.48
,
1.4
,
4.8315926
],
[
31.31978
,
-
8.162144
,
-
1.6217787
,
1.74
,
3.77
,
1.48
,
0.35159278
]])
[
31.31978
,
-
8.162144
,
-
1.6217787
,
1.74
,
3.77
,
1.48
,
0.35159278
]])
boxes
.
flip
()
boxes
.
flip
(
'horizontal'
)
assert
torch
.
allclose
(
boxes
.
tensor
,
expected_tensor
)
assert
torch
.
allclose
(
boxes
.
tensor
,
expected_tensor
)
expected_tensor
=
torch
.
tensor
(
[[
-
1.7802
,
-
2.5162
,
-
1.7501
,
1.7500
,
3.3900
,
1.6500
,
-
1.6616
],
[
-
8.9594
,
-
2.4567
,
-
1.6357
,
1.5400
,
4.0100
,
1.5700
,
-
1.5216
],
[
-
28.2967
,
0.5558
,
-
1.3033
,
1.4700
,
2.2300
,
1.4800
,
-
4.7116
],
[
-
26.6690
,
-
21.8230
,
-
1.7361
,
1.5600
,
3.4800
,
1.4000
,
-
4.8316
],
[
-
31.3198
,
-
8.1621
,
-
1.6218
,
1.7400
,
3.7700
,
1.4800
,
-
0.3516
]])
boxes_flip_vert
=
boxes
.
clone
()
boxes_flip_vert
.
flip
(
'vertical'
)
assert
torch
.
allclose
(
boxes_flip_vert
.
tensor
,
expected_tensor
,
1e-4
)
# test box rotation
# test box rotation
expected_tensor
=
torch
.
tensor
(
expected_tensor
=
torch
.
tensor
(
[[
1.0385344
,
-
2.9020846
,
-
1.7501148
,
1.75
,
3.39
,
1.65
,
1.9336663
],
[[
1.0385344
,
-
2.9020846
,
-
1.7501148
,
1.75
,
3.39
,
1.65
,
1.9336663
],
...
@@ -223,7 +273,7 @@ def test_lidar_boxes3d():
...
@@ -223,7 +273,7 @@ def test_lidar_boxes3d():
[
27.3398
,
-
18.3976
,
29.0896
,
-
14.6065
]])
[
27.3398
,
-
18.3976
,
29.0896
,
-
14.6065
]])
# the pytorch print loses some precision
# the pytorch print loses some precision
assert
torch
.
allclose
(
assert
torch
.
allclose
(
boxes
.
near
s
et_bev
,
expected_tensor
,
rtol
=
1e-4
,
atol
=
1e-7
)
boxes
.
neare
s
t_bev
,
expected_tensor
,
rtol
=
1e-4
,
atol
=
1e-7
)
# obtained by the print of the original implementation
# obtained by the print of the original implementation
expected_tensor
=
torch
.
tensor
([[[
2.4093e+00
,
-
4.4784e+00
,
-
1.9169e+00
],
expected_tensor
=
torch
.
tensor
([[[
2.4093e+00
,
-
4.4784e+00
,
-
1.9169e+00
],
...
@@ -269,6 +319,25 @@ def test_lidar_boxes3d():
...
@@ -269,6 +319,25 @@ def test_lidar_boxes3d():
# the pytorch print loses some precision
# the pytorch print loses some precision
assert
torch
.
allclose
(
boxes
.
corners
,
expected_tensor
,
rtol
=
1e-4
,
atol
=
1e-7
)
assert
torch
.
allclose
(
boxes
.
corners
,
expected_tensor
,
rtol
=
1e-4
,
atol
=
1e-7
)
# test new_box
new_box1
=
boxes
.
new_box
([[
1
,
2
,
3
,
4
,
5
,
6
,
7
]])
assert
torch
.
allclose
(
new_box1
.
tensor
,
torch
.
tensor
([[
1
,
2
,
3
,
4
,
5
,
6
,
7
]],
dtype
=
boxes
.
tensor
.
dtype
))
assert
new_box1
.
device
==
boxes
.
device
assert
new_box1
.
with_yaw
==
boxes
.
with_yaw
assert
new_box1
.
box_dim
==
boxes
.
box_dim
new_box2
=
boxes
.
new_box
(
np
.
array
([[
1
,
2
,
3
,
4
,
5
,
6
,
7
]]))
assert
torch
.
allclose
(
new_box2
.
tensor
,
torch
.
tensor
([[
1
,
2
,
3
,
4
,
5
,
6
,
7
]],
dtype
=
boxes
.
tensor
.
dtype
))
new_box3
=
boxes
.
new_box
(
torch
.
tensor
([[
1
,
2
,
3
,
4
,
5
,
6
,
7
]]))
assert
torch
.
allclose
(
new_box3
.
tensor
,
torch
.
tensor
([[
1
,
2
,
3
,
4
,
5
,
6
,
7
]],
dtype
=
boxes
.
tensor
.
dtype
))
def
test_boxes_conversion
():
def
test_boxes_conversion
():
"""Test the conversion of boxes between different modes.
"""Test the conversion of boxes between different modes.
...
@@ -284,6 +353,8 @@ def test_boxes_conversion():
...
@@ -284,6 +353,8 @@ def test_boxes_conversion():
[
31.31978
,
8.162144
,
-
1.6217787
,
1.74
,
3.77
,
1.48
,
2.79
]])
[
31.31978
,
8.162144
,
-
1.6217787
,
1.74
,
3.77
,
1.48
,
2.79
]])
cam_box_tensor
=
Box3DMode
.
convert
(
lidar_boxes
.
tensor
,
Box3DMode
.
LIDAR
,
cam_box_tensor
=
Box3DMode
.
convert
(
lidar_boxes
.
tensor
,
Box3DMode
.
LIDAR
,
Box3DMode
.
CAM
)
Box3DMode
.
CAM
)
expected_box
=
lidar_boxes
.
convert_to
(
Box3DMode
.
CAM
)
assert
torch
.
equal
(
expected_box
.
tensor
,
cam_box_tensor
)
# Some properties should be the same
# Some properties should be the same
cam_boxes
=
CameraInstance3DBoxes
(
cam_box_tensor
)
cam_boxes
=
CameraInstance3DBoxes
(
cam_box_tensor
)
...
@@ -310,16 +381,10 @@ def test_boxes_conversion():
...
@@ -310,16 +381,10 @@ def test_boxes_conversion():
Box3DMode
.
DEPTH
,
Box3DMode
.
CAM
)
Box3DMode
.
DEPTH
,
Box3DMode
.
CAM
)
assert
torch
.
allclose
(
cam_box_tensor
,
depth_to_cam_box_tensor
)
assert
torch
.
allclose
(
cam_box_tensor
,
depth_to_cam_box_tensor
)
# test error raise with not supported conversion
with
pytest
.
raises
(
NotImplementedError
):
Box3DMode
.
convert
(
lidar_box_tensor
,
Box3DMode
.
LIDAR
,
Box3DMode
.
DEPTH
)
with
pytest
.
raises
(
NotImplementedError
):
Box3DMode
.
convert
(
depth_box_tensor
,
Box3DMode
.
DEPTH
,
Box3DMode
.
LIDAR
)
# test similar mode conversion
# test similar mode conversion
same_results
=
Box3DMode
.
convert
(
depth_box_tensor
,
Box3DMode
.
DEPTH
,
same_results
=
Box3DMode
.
convert
(
depth_box_tensor
,
Box3DMode
.
DEPTH
,
Box3DMode
.
DEPTH
)
Box3DMode
.
DEPTH
)
assert
(
same_results
==
depth_box_tensor
)
.
all
()
assert
torch
.
equal
(
same_results
,
depth_box_tensor
)
# test conversion with a given rt_mat
# test conversion with a given rt_mat
camera_boxes
=
CameraInstance3DBoxes
(
camera_boxes
=
CameraInstance3DBoxes
(
...
@@ -389,6 +454,35 @@ def test_boxes_conversion():
...
@@ -389,6 +454,35 @@ def test_boxes_conversion():
rt_mat
.
inverse
().
numpy
())
rt_mat
.
inverse
().
numpy
())
assert
np
.
allclose
(
np
.
array
(
cam_to_lidar_box
),
expected_tensor
[
0
].
numpy
())
assert
np
.
allclose
(
np
.
array
(
cam_to_lidar_box
),
expected_tensor
[
0
].
numpy
())
# test convert from depth to lidar
depth_boxes
=
torch
.
tensor
(
[[
2.4593
,
2.5870
,
-
0.4321
,
0.8597
,
0.6193
,
1.0204
,
3.0693
],
[
1.4856
,
2.5299
,
-
0.5570
,
0.9385
,
2.1404
,
0.8954
,
3.0601
]],
dtype
=
torch
.
float32
)
depth_boxes
=
DepthInstance3DBoxes
(
depth_boxes
)
depth_to_lidar_box
=
depth_boxes
.
convert_to
(
Box3DMode
.
LIDAR
)
expected_box
=
depth_to_lidar_box
.
convert_to
(
Box3DMode
.
DEPTH
)
assert
torch
.
equal
(
depth_boxes
.
tensor
,
expected_box
.
tensor
)
lidar_to_depth_box
=
Box3DMode
.
convert
(
depth_to_lidar_box
,
Box3DMode
.
LIDAR
,
Box3DMode
.
DEPTH
)
assert
torch
.
allclose
(
depth_boxes
.
tensor
,
lidar_to_depth_box
.
tensor
)
assert
torch
.
allclose
(
depth_boxes
.
volume
,
lidar_to_depth_box
.
volume
)
# test convert from depth to camera
depth_to_cam_box
=
Box3DMode
.
convert
(
depth_boxes
,
Box3DMode
.
DEPTH
,
Box3DMode
.
CAM
)
cam_to_depth_box
=
Box3DMode
.
convert
(
depth_to_cam_box
,
Box3DMode
.
CAM
,
Box3DMode
.
DEPTH
)
expected_tensor
=
depth_to_cam_box
.
convert_to
(
Box3DMode
.
DEPTH
)
assert
torch
.
equal
(
expected_tensor
.
tensor
,
cam_to_depth_box
.
tensor
)
assert
torch
.
allclose
(
depth_boxes
.
tensor
,
cam_to_depth_box
.
tensor
)
assert
torch
.
allclose
(
depth_boxes
.
volume
,
cam_to_depth_box
.
volume
)
with
pytest
.
raises
(
NotImplementedError
):
# assert invalid convert mode
Box3DMode
.
convert
(
depth_boxes
,
Box3DMode
.
DEPTH
,
3
)
def
test_camera_boxes3d
():
def
test_camera_boxes3d
():
# Test init with numpy array
# Test init with numpy array
...
@@ -449,9 +543,19 @@ def test_camera_boxes3d():
...
@@ -449,9 +543,19 @@ def test_camera_boxes3d():
[
26.66902
,
-
21.82302
,
-
1.736057
,
1.56
,
3.48
,
1.4
,
4.8315926
],
[
26.66902
,
-
21.82302
,
-
1.736057
,
1.56
,
3.48
,
1.4
,
4.8315926
],
[
31.31978
,
-
8.162144
,
-
1.6217787
,
1.74
,
3.77
,
1.48
,
0.35159278
]]),
[
31.31978
,
-
8.162144
,
-
1.6217787
,
1.74
,
3.77
,
1.48
,
0.35159278
]]),
Box3DMode
.
LIDAR
,
Box3DMode
.
CAM
)
Box3DMode
.
LIDAR
,
Box3DMode
.
CAM
)
boxes
.
flip
()
boxes
.
flip
(
'horizontal'
)
assert
torch
.
allclose
(
boxes
.
tensor
,
expected_tensor
)
assert
torch
.
allclose
(
boxes
.
tensor
,
expected_tensor
)
expected_tensor
=
torch
.
tensor
(
[[
2.5162
,
1.7501
,
-
1.7802
,
3.3900
,
1.6500
,
1.7500
,
-
1.6616
],
[
2.4567
,
1.6357
,
-
8.9594
,
4.0100
,
1.5700
,
1.5400
,
-
1.5216
],
[
-
0.5558
,
1.3033
,
-
28.2967
,
2.2300
,
1.4800
,
1.4700
,
-
4.7116
],
[
21.8230
,
1.7361
,
-
26.6690
,
3.4800
,
1.4000
,
1.5600
,
-
4.8316
],
[
8.1621
,
1.6218
,
-
31.3198
,
3.7700
,
1.4800
,
1.7400
,
-
0.3516
]])
boxes_flip_vert
=
boxes
.
clone
()
boxes_flip_vert
.
flip
(
'vertical'
)
assert
torch
.
allclose
(
boxes_flip_vert
.
tensor
,
expected_tensor
,
1e-4
)
# test box rotation
# test box rotation
expected_tensor
=
Box3DMode
.
convert
(
expected_tensor
=
Box3DMode
.
convert
(
torch
.
tensor
(
torch
.
tensor
(
...
@@ -560,7 +664,7 @@ def test_camera_boxes3d():
...
@@ -560,7 +664,7 @@ def test_camera_boxes3d():
expected_tensor
[:,
1
::
2
]
=
lidar_expected_tensor
[:,
0
::
2
]
expected_tensor
[:,
1
::
2
]
=
lidar_expected_tensor
[:,
0
::
2
]
# the pytorch print loses some precision
# the pytorch print loses some precision
assert
torch
.
allclose
(
assert
torch
.
allclose
(
boxes
.
near
s
et_bev
,
expected_tensor
,
rtol
=
1e-4
,
atol
=
1e-7
)
boxes
.
neare
s
t_bev
,
expected_tensor
,
rtol
=
1e-4
,
atol
=
1e-7
)
# obtained by the print of the original implementation
# obtained by the print of the original implementation
expected_tensor
=
torch
.
tensor
([[[
3.2684e+00
,
2.5769e-01
,
-
7.7767e-01
],
expected_tensor
=
torch
.
tensor
([[[
3.2684e+00
,
2.5769e-01
,
-
7.7767e-01
],
...
@@ -659,3 +763,130 @@ def test_boxes3d_overlaps():
...
@@ -659,3 +763,130 @@ def test_boxes3d_overlaps():
cam_boxes1
.
overlaps
(
cam_boxes1
,
boxes1
)
cam_boxes1
.
overlaps
(
cam_boxes1
,
boxes1
)
with
pytest
.
raises
(
AssertionError
):
with
pytest
.
raises
(
AssertionError
):
boxes1
.
overlaps
(
cam_boxes1
,
boxes1
)
boxes1
.
overlaps
(
cam_boxes1
,
boxes1
)
def
test_depth_boxes3d
():
# test empty initialization
empty_boxes
=
[]
boxes
=
DepthInstance3DBoxes
(
empty_boxes
)
assert
boxes
.
tensor
.
shape
[
0
]
==
0
assert
boxes
.
tensor
.
shape
[
1
]
==
7
# Test init with numpy array
np_boxes
=
np
.
array
(
[[
1.4856
,
2.5299
,
-
0.5570
,
0.9385
,
2.1404
,
0.8954
,
3.0601
],
[
2.3262
,
3.3065
,
--
0.44255
,
0.8234
,
0.5325
,
1.0099
,
2.9971
]],
dtype
=
np
.
float32
)
boxes_1
=
DepthInstance3DBoxes
(
np_boxes
)
assert
torch
.
allclose
(
boxes_1
.
tensor
,
torch
.
from_numpy
(
np_boxes
))
# test properties
assert
boxes_1
.
volume
.
size
(
0
)
==
2
assert
(
boxes_1
.
center
==
boxes_1
.
bottom_center
).
all
()
expected_tensor
=
torch
.
tensor
([[
1.4856
,
2.5299
,
-
0.1093
],
[
2.3262
,
3.3065
,
0.9475
]])
assert
torch
.
allclose
(
boxes_1
.
gravity_center
,
expected_tensor
)
expected_tensor
=
torch
.
tensor
([[
1.4856
,
2.5299
,
0.9385
,
2.1404
,
3.0601
],
[
2.3262
,
3.3065
,
0.8234
,
0.5325
,
2.9971
]])
assert
torch
.
allclose
(
boxes_1
.
bev
,
expected_tensor
)
expected_tensor
=
torch
.
tensor
([[
1.0164
,
1.4597
,
1.9548
,
3.6001
],
[
1.9145
,
3.0402
,
2.7379
,
3.5728
]])
assert
torch
.
allclose
(
boxes_1
.
nearest_bev
,
expected_tensor
,
1e-4
)
assert
repr
(
boxes
)
==
(
'DepthInstance3DBoxes(
\n
tensor([], size=(0, 7)))'
)
# test init with torch.Tensor
th_boxes
=
torch
.
tensor
(
[[
2.4593
,
2.5870
,
-
0.4321
,
0.8597
,
0.6193
,
1.0204
,
3.0693
],
[
1.4856
,
2.5299
,
-
0.5570
,
0.9385
,
2.1404
,
0.8954
,
3.0601
]],
dtype
=
torch
.
float32
)
boxes_2
=
DepthInstance3DBoxes
(
th_boxes
)
assert
torch
.
allclose
(
boxes_2
.
tensor
,
th_boxes
)
# test clone/to/device
boxes_2
=
boxes_2
.
clone
()
boxes_1
=
boxes_1
.
to
(
boxes_2
.
device
)
# test box concatenation
expected_tensor
=
torch
.
tensor
(
[[
1.4856
,
2.5299
,
-
0.5570
,
0.9385
,
2.1404
,
0.8954
,
3.0601
],
[
2.3262
,
3.3065
,
--
0.44255
,
0.8234
,
0.5325
,
1.0099
,
2.9971
],
[
2.4593
,
2.5870
,
-
0.4321
,
0.8597
,
0.6193
,
1.0204
,
3.0693
],
[
1.4856
,
2.5299
,
-
0.5570
,
0.9385
,
2.1404
,
0.8954
,
3.0601
]])
boxes
=
DepthInstance3DBoxes
.
cat
([
boxes_1
,
boxes_2
])
assert
torch
.
allclose
(
boxes
.
tensor
,
expected_tensor
)
# concatenate empty list
empty_boxes
=
DepthInstance3DBoxes
.
cat
([])
assert
empty_boxes
.
tensor
.
shape
[
0
]
==
0
assert
empty_boxes
.
tensor
.
shape
[
-
1
]
==
7
# test box flip
expected_tensor
=
torch
.
tensor
(
[[
-
1.4856
,
2.5299
,
-
0.5570
,
0.9385
,
2.1404
,
0.8954
,
0.0815
],
[
-
2.3262
,
3.3065
,
0.4426
,
0.8234
,
0.5325
,
1.0099
,
0.1445
],
[
-
2.4593
,
2.5870
,
-
0.4321
,
0.8597
,
0.6193
,
1.0204
,
0.0723
],
[
-
1.4856
,
2.5299
,
-
0.5570
,
0.9385
,
2.1404
,
0.8954
,
0.0815
]])
boxes
.
flip
(
bev_direction
=
'horizontal'
)
assert
torch
.
allclose
(
boxes
.
tensor
,
expected_tensor
,
1e-3
)
expected_tensor
=
torch
.
tensor
(
[[
-
1.4856
,
-
2.5299
,
-
0.5570
,
0.9385
,
2.1404
,
0.8954
,
-
0.0815
],
[
-
2.3262
,
-
3.3065
,
0.4426
,
0.8234
,
0.5325
,
1.0099
,
-
0.1445
],
[
-
2.4593
,
-
2.5870
,
-
0.4321
,
0.8597
,
0.6193
,
1.0204
,
-
0.0723
],
[
-
1.4856
,
-
2.5299
,
-
0.5570
,
0.9385
,
2.1404
,
0.8954
,
-
0.0815
]])
boxes
.
flip
(
bev_direction
=
'vertical'
)
assert
torch
.
allclose
(
boxes
.
tensor
,
expected_tensor
,
1e-3
)
# test box rotation
boxes_rot
=
boxes
.
clone
()
expected_tensor
=
torch
.
tensor
(
[[
-
1.6004
,
-
2.4589
,
-
0.5570
,
0.9385
,
2.1404
,
0.8954
,
-
0.0355
],
[
-
2.4758
,
-
3.1960
,
0.4426
,
0.8234
,
0.5325
,
1.0099
,
-
0.0985
],
[
-
2.5757
,
-
2.4712
,
-
0.4321
,
0.8597
,
0.6193
,
1.0204
,
-
0.0263
],
[
-
1.6004
,
-
2.4589
,
-
0.5570
,
0.9385
,
2.1404
,
0.8954
,
-
0.0355
]])
boxes_rot
.
rotate
(
-
0.04599790655000615
)
assert
torch
.
allclose
(
boxes_rot
.
tensor
,
expected_tensor
,
1e-3
)
th_boxes
=
torch
.
tensor
(
[[
0.61211395
,
0.8129094
,
0.10563634
,
1.497534
,
0.16927195
,
0.27956772
],
[
1.430009
,
0.49797538
,
0.9382923
,
0.07694054
,
0.9312509
,
1.8919173
]],
dtype
=
torch
.
float32
)
boxes
=
DepthInstance3DBoxes
(
th_boxes
,
box_dim
=
6
,
with_yaw
=
False
)
expected_tensor
=
torch
.
tensor
([[
0.64884546
,
0.78390356
,
0.10563634
,
1.50373348
,
0.23795205
,
0.27956772
,
0
],
[
1.45139421
,
0.43169443
,
0.93829232
,
0.11967964
,
0.93380373
,
1.89191735
,
0
]])
boxes_3
=
boxes
.
clone
()
boxes_3
.
rotate
(
-
0.04599790655000615
)
assert
torch
.
allclose
(
boxes_3
.
tensor
,
expected_tensor
)
boxes
.
rotate
(
torch
.
tensor
(
-
0.04599790655000615
))
assert
torch
.
allclose
(
boxes
.
tensor
,
expected_tensor
)
# test bbox in_range_bev
expected_tensor
=
torch
.
tensor
([
1
,
1
],
dtype
=
torch
.
bool
)
mask
=
boxes
.
in_range_bev
([
0.
,
-
40.
,
70.4
,
40.
])
assert
(
mask
==
expected_tensor
).
all
()
mask
=
boxes
.
nonempty
()
assert
(
mask
==
expected_tensor
).
all
()
expected_tensor
=
torch
.
tensor
([[[
-
0.1030
,
0.6649
,
0.1056
],
[
-
0.1030
,
0.6649
,
0.3852
],
[
-
0.1030
,
0.9029
,
0.3852
],
[
-
0.1030
,
0.9029
,
0.1056
],
[
1.4007
,
0.6649
,
0.1056
],
[
1.4007
,
0.6649
,
0.3852
],
[
1.4007
,
0.9029
,
0.3852
],
[
1.4007
,
0.9029
,
0.1056
]],
[[
1.3916
,
-
0.0352
,
0.9383
],
[
1.3916
,
-
0.0352
,
2.8302
],
[
1.3916
,
0.8986
,
2.8302
],
[
1.3916
,
0.8986
,
0.9383
],
[
1.5112
,
-
0.0352
,
0.9383
],
[
1.5112
,
-
0.0352
,
2.8302
],
[
1.5112
,
0.8986
,
2.8302
],
[
1.5112
,
0.8986
,
0.9383
]]])
torch
.
allclose
(
boxes
.
corners
,
expected_tensor
)
tests/test_pipeline/test_outdoor_pipeline.py
0 → 100644
View file @
f2b01720
import
numpy
as
np
import
torch
from
mmdet3d.core.bbox
import
LiDARInstance3DBoxes
from
mmdet3d.datasets.pipelines
import
Compose
def
test_outdoor_aug_pipeline
():
point_cloud_range
=
[
0
,
-
40
,
-
3
,
70.4
,
40
,
1
]
class_names
=
[
'Car'
]
np
.
random
.
seed
(
0
)
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
),
dict
(
type
=
'ObjectNoise'
,
num_try
=
100
,
loc_noise_std
=
[
1.0
,
1.0
,
0.5
],
global_rot_range
=
[
0.0
,
0.0
],
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
]),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0.5
),
dict
(
type
=
'GlobalRotScale'
,
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
],
scaling_uniform_noise
=
[
0.95
,
1.05
]),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
]
pipeline
=
Compose
(
train_pipeline
)
gt_bboxes_3d
=
LiDARInstance3DBoxes
(
torch
.
tensor
([
[
2.16902428e+01
,
-
4.06038128e-02
,
-
1.61906636e+00
,
1.65999997e+00
,
3.20000005e+00
,
1.61000001e+00
,
-
1.53999996e+00
],
[
7.05006886e+00
,
-
6.57459593e+00
,
-
1.60107934e+00
,
2.27999997e+00
,
1.27799997e+01
,
3.66000009e+00
,
1.54999995e+00
],
[
2.24698811e+01
,
-
6.69203758e+00
,
-
1.50118136e+00
,
2.31999993e+00
,
1.47299995e+01
,
3.64000010e+00
,
1.59000003e+00
],
[
3.48291969e+01
,
-
7.09058380e+00
,
-
1.36622977e+00
,
2.31999993e+00
,
1.00400000e+01
,
3.60999990e+00
,
1.61000001e+00
],
[
4.62394600e+01
,
-
7.75838804e+00
,
-
1.32405007e+00
,
2.33999991e+00
,
1.28299999e+01
,
3.63000011e+00
,
1.63999999e+00
],
[
2.82966995e+01
,
-
5.55755794e-01
,
-
1.30332506e+00
,
1.47000003e+00
,
2.23000002e+00
,
1.48000002e+00
,
-
1.57000005e+00
],
[
2.66690197e+01
,
2.18230209e+01
,
-
1.73605704e+00
,
1.55999994e+00
,
3.48000002e+00
,
1.39999998e+00
,
-
1.69000006e+00
],
[
3.13197803e+01
,
8.16214371e+00
,
-
1.62177873e+00
,
1.74000001e+00
,
3.76999998e+00
,
1.48000002e+00
,
2.78999996e+00
],
[
4.34395561e+01
,
-
1.95209332e+01
,
-
1.20757008e+00
,
1.69000006e+00
,
4.09999990e+00
,
1.40999997e+00
,
-
1.53999996e+00
],
[
3.29882965e+01
,
-
3.79360509e+00
,
-
1.69245458e+00
,
1.74000001e+00
,
4.09000015e+00
,
1.49000001e+00
,
-
1.52999997e+00
],
[
3.85469360e+01
,
8.35060215e+00
,
-
1.31423414e+00
,
1.59000003e+00
,
4.28000021e+00
,
1.45000005e+00
,
1.73000002e+00
],
[
2.22492104e+01
,
-
1.13536005e+01
,
-
1.38272512e+00
,
1.62000000e+00
,
3.55999994e+00
,
1.71000004e+00
,
2.48000002e+00
],
[
3.36115799e+01
,
-
1.97708054e+01
,
-
4.92827654e-01
,
1.64999998e+00
,
3.54999995e+00
,
1.79999995e+00
,
-
1.57000005e+00
],
[
9.85029602e+00
,
-
1.51294518e+00
,
-
1.66834795e+00
,
1.59000003e+00
,
3.17000008e+00
,
1.38999999e+00
,
-
8.39999974e-01
]
],
dtype
=
torch
.
float32
))
gt_labels_3d
=
np
.
array
([
0
,
-
1
,
-
1
,
-
1
,
-
1
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
])
results
=
dict
(
pts_filename
=
'tests/data/kitti/a.bin'
,
ann_info
=
dict
(
gt_bboxes_3d
=
gt_bboxes_3d
,
gt_labels_3d
=
gt_labels_3d
),
bbox3d_fields
=
[],
)
output
=
pipeline
(
results
)
expected_tensor
=
torch
.
tensor
(
[[
20.6514
,
-
8.8250
,
-
1.0816
,
1.5893
,
3.0637
,
1.5414
,
-
1.9216
],
[
7.9374
,
4.9457
,
-
1.2008
,
2.1829
,
12.2357
,
3.5041
,
1.6629
],
[
20.8115
,
-
2.0273
,
-
1.8893
,
2.2212
,
14.1026
,
3.4850
,
2.6513
],
[
32.3850
,
-
5.2135
,
-
1.1321
,
2.2212
,
9.6124
,
3.4562
,
2.6498
],
[
43.7022
,
-
7.8316
,
-
0.5090
,
2.2403
,
12.2836
,
3.4754
,
2.0146
],
[
25.3300
,
-
9.6670
,
-
1.0855
,
1.4074
,
2.1350
,
1.4170
,
-
0.7141
],
[
16.5414
,
-
29.0583
,
-
0.9768
,
1.4936
,
3.3318
,
1.3404
,
-
0.7153
],
[
24.6548
,
-
18.9226
,
-
1.3567
,
1.6659
,
3.6094
,
1.4170
,
1.3970
],
[
45.8403
,
1.8183
,
-
1.1626
,
1.6180
,
3.9254
,
1.3499
,
-
0.6886
],
[
30.6288
,
-
8.4497
,
-
1.4881
,
1.6659
,
3.9158
,
1.4265
,
-
0.7241
],
[
32.3316
,
-
22.4611
,
-
1.3131
,
1.5223
,
4.0977
,
1.3882
,
2.4186
],
[
22.4492
,
3.2944
,
-
2.1674
,
1.5510
,
3.4084
,
1.6372
,
0.3928
],
[
37.3824
,
5.0472
,
-
0.6579
,
1.5797
,
3.3988
,
1.7233
,
-
1.4862
],
[
8.9259
,
-
1.2578
,
-
1.6081
,
1.5223
,
3.0350
,
1.3308
,
-
1.7212
]])
assert
torch
.
allclose
(
output
[
'gt_bboxes_3d'
].
_data
.
tensor
,
expected_tensor
,
atol
=
1e-3
)
def
test_outdoor_velocity_aug_pipeline
():
point_cloud_range
=
[
-
50
,
-
50
,
-
5
,
50
,
50
,
3
]
class_names
=
[
'car'
,
'truck'
,
'trailer'
,
'bus'
,
'construction_vehicle'
,
'bicycle'
,
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
]
np
.
random
.
seed
(
0
)
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
),
dict
(
type
=
'GlobalRotScale'
,
rot_uniform_noise
=
[
-
0.3925
,
0.3925
],
scaling_uniform_noise
=
[
0.95
,
1.05
],
trans_normal_noise
=
[
0
,
0
,
0
]),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0.5
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
]
pipeline
=
Compose
(
train_pipeline
)
gt_bboxes_3d
=
LiDARInstance3DBoxes
(
torch
.
tensor
(
[[
-
5.2422e+00
,
4.0021e+01
,
-
4.7643e-01
,
2.0620e+00
,
4.4090e+00
,
1.5480e+00
,
-
1.4880e+00
,
8.5338e-03
,
4.4934e-02
],
[
-
2.6675e+01
,
5.5950e+00
,
-
1.3053e+00
,
3.4300e-01
,
4.5800e-01
,
7.8200e-01
,
-
4.6276e+00
,
-
4.3284e-04
,
-
1.8543e-03
],
[
-
5.8098e+00
,
3.5409e+01
,
-
6.6511e-01
,
2.3960e+00
,
3.9690e+00
,
1.7320e+00
,
-
4.6520e+00
,
0.0000e+00
,
0.0000e+00
],
[
-
3.1309e+01
,
1.0901e+00
,
-
1.0561e+00
,
1.9440e+00
,
3.8570e+00
,
1.7230e+00
,
-
2.8143e+00
,
-
2.7606e-02
,
-
8.0573e-02
],
[
-
4.5642e+01
,
2.0136e+01
,
-
2.4681e-02
,
1.9870e+00
,
4.4400e+00
,
1.9420e+00
,
2.8336e-01
,
0.0000e+00
,
0.0000e+00
],
[
-
5.1617e+00
,
1.8305e+01
,
-
1.0879e+00
,
2.3230e+00
,
4.8510e+00
,
1.3710e+00
,
-
1.5803e+00
,
0.0000e+00
,
0.0000e+00
],
[
-
2.5285e+01
,
4.1442e+00
,
-
1.2713e+00
,
1.7550e+00
,
1.9890e+00
,
2.2200e+00
,
-
4.4900e+00
,
-
3.1784e-02
,
-
1.5291e-01
],
[
-
2.2611e+00
,
1.9170e+01
,
-
1.1452e+00
,
9.1900e-01
,
1.1230e+00
,
1.9310e+00
,
4.7790e-02
,
6.7684e-02
,
-
1.7537e+00
],
[
-
6.5878e+01
,
1.3500e+01
,
-
2.2528e-01
,
1.8200e+00
,
3.8520e+00
,
1.5450e+00
,
-
2.8757e+00
,
0.0000e+00
,
0.0000e+00
],
[
-
5.4490e+00
,
2.8363e+01
,
-
7.7275e-01
,
2.2360e+00
,
3.7540e+00
,
1.5590e+00
,
-
4.6520e+00
,
-
7.9736e-03
,
7.7207e-03
]],
dtype
=
torch
.
float32
),
box_dim
=
9
)
gt_labels_3d
=
np
.
array
([
0
,
8
,
0
,
0
,
0
,
0
,
-
1
,
7
,
0
,
0
])
results
=
dict
(
pts_filename
=
'tests/data/kitti/a.bin'
,
ann_info
=
dict
(
gt_bboxes_3d
=
gt_bboxes_3d
,
gt_labels_3d
=
gt_labels_3d
),
bbox3d_fields
=
[],
)
output
=
pipeline
(
results
)
expected_tensor
=
torch
.
tensor
(
[[
-
3.7849e+00
,
-
4.1057e+01
,
-
4.8668e-01
,
2.1064e+00
,
4.5039e+00
,
1.5813e+00
,
-
1.6919e+00
,
1.0469e-02
,
-
4.5533e-02
],
[
-
2.7010e+01
,
-
6.7551e+00
,
-
1.3334e+00
,
3.5038e-01
,
4.6786e-01
,
7.9883e-01
,
1.4477e+00
,
-
5.1440e-04
,
1.8758e-03
],
[
-
4.5448e+00
,
-
3.6372e+01
,
-
6.7942e-01
,
2.4476e+00
,
4.0544e+00
,
1.7693e+00
,
1.4721e+00
,
0.0000e+00
,
-
0.0000e+00
],
[
-
3.1916e+01
,
-
2.3379e+00
,
-
1.0788e+00
,
1.9858e+00
,
3.9400e+00
,
1.7601e+00
,
-
3.6564e-01
,
-
3.1333e-02
,
8.1166e-02
],
[
-
4.5802e+01
,
-
2.2340e+01
,
-
2.5213e-02
,
2.0298e+00
,
4.5355e+00
,
1.9838e+00
,
2.8199e+00
,
0.0000e+00
,
-
0.0000e+00
],
[
-
4.5526e+00
,
-
1.8887e+01
,
-
1.1114e+00
,
2.3730e+00
,
4.9554e+00
,
1.4005e+00
,
-
1.5997e+00
,
0.0000e+00
,
-
0.0000e+00
],
[
-
2.5648e+01
,
-
5.2197e+00
,
-
1.2987e+00
,
1.7928e+00
,
2.0318e+00
,
2.2678e+00
,
1.3100e+00
,
-
3.8428e-02
,
1.5485e-01
],
[
-
1.5578e+00
,
-
1.9657e+01
,
-
1.1699e+00
,
9.3878e-01
,
1.1472e+00
,
1.9726e+00
,
3.0555e+00
,
4.5907e-04
,
1.7928e+00
],
[
-
4.4522e+00
,
-
2.9166e+01
,
-
7.8938e-01
,
2.2841e+00
,
3.8348e+00
,
1.5925e+00
,
1.4721e+00
,
-
7.8371e-03
,
-
8.1931e-03
]])
assert
torch
.
allclose
(
output
[
'gt_bboxes_3d'
].
_data
.
tensor
,
expected_tensor
,
atol
=
1e-3
)
tests/test_semantic_heads.py
View file @
f2b01720
import
pytest
import
pytest
import
torch
import
torch
from
mmdet3d.core.bbox
import
LiDARInstance3DBoxes
def
test_PointwiseSemanticHead
():
def
test_PointwiseSemanticHead
():
# PointwiseSemanticHead only support gpu version currently.
# PointwiseSemanticHead only support gpu version currently.
...
@@ -47,19 +49,29 @@ def test_PointwiseSemanticHead():
...
@@ -47,19 +49,29 @@ def test_PointwiseSemanticHead():
[
1
,
35
,
930
,
469
]],
[
1
,
35
,
930
,
469
]],
dtype
=
torch
.
int32
).
cuda
()
# n, 4(batch, ind_x, ind_y, ind_z)
dtype
=
torch
.
int32
).
cuda
()
# n, 4(batch, ind_x, ind_y, ind_z)
voxel_dict
=
dict
(
voxel_centers
=
voxel_centers
,
coors
=
coordinates
)
voxel_dict
=
dict
(
voxel_centers
=
voxel_centers
,
coors
=
coordinates
)
gt_bboxes
=
list
(
gt_bboxes
=
[
torch
.
tensor
(
LiDARInstance3DBoxes
(
[[[
6.4118
,
-
3.4305
,
-
1.7291
,
1.7033
,
3.4693
,
1.6197
,
-
0.9091
]],
torch
.
tensor
(
[[
16.9107
,
9.7925
,
-
1.9201
,
1.6097
,
3.2786
,
1.5307
,
-
2.4056
]]],
[[
6.4118
,
-
3.4305
,
-
1.7291
,
1.7033
,
3.4693
,
1.6197
,
-
0.9091
]],
dtype
=
torch
.
float32
).
cuda
())
dtype
=
torch
.
float32
).
cuda
()),
LiDARInstance3DBoxes
(
torch
.
tensor
(
[[
16.9107
,
9.7925
,
-
1.9201
,
1.6097
,
3.2786
,
1.5307
,
-
2.4056
]],
dtype
=
torch
.
float32
).
cuda
())
]
# batch size is 2 in the unit test
gt_labels
=
list
(
torch
.
tensor
([[
0
],
[
1
]],
dtype
=
torch
.
int64
).
cuda
())
gt_labels
=
list
(
torch
.
tensor
([[
0
],
[
1
]],
dtype
=
torch
.
int64
).
cuda
())
# test get_targets
# test get_targets
target_dict
=
self
.
get_targets
(
voxel_dict
,
gt_bboxes
,
gt_labels
)
target_dict
=
self
.
get_targets
(
voxel_dict
,
gt_bboxes
,
gt_labels
)
assert
target_dict
[
'seg_targets'
].
shape
==
torch
.
Size
(
assert
target_dict
[
'seg_targets'
].
shape
==
torch
.
Size
(
[
voxel_features
.
shape
[
0
]])
[
voxel_features
.
shape
[
0
]])
assert
torch
.
allclose
(
target_dict
[
'seg_targets'
],
target_dict
[
'seg_targets'
].
new_tensor
([
3
,
-
1
,
3
,
3
]))
assert
target_dict
[
'part_targets'
].
shape
==
torch
.
Size
(
assert
target_dict
[
'part_targets'
].
shape
==
torch
.
Size
(
[
voxel_features
.
shape
[
0
],
3
])
[
voxel_features
.
shape
[
0
],
3
])
assert
target_dict
[
'part_targets'
].
sum
()
==
0
# test loss
# test loss
loss_dict
=
self
.
loss
(
feats_dict
,
target_dict
)
loss_dict
=
self
.
loss
(
feats_dict
,
target_dict
)
...
@@ -67,7 +79,3 @@ def test_PointwiseSemanticHead():
...
@@ -67,7 +79,3 @@ def test_PointwiseSemanticHead():
assert
loss_dict
[
'loss_part'
]
==
0
# no points in gt_boxes
assert
loss_dict
[
'loss_part'
]
==
0
# no points in gt_boxes
total_loss
=
loss_dict
[
'loss_seg'
]
+
loss_dict
[
'loss_part'
]
total_loss
=
loss_dict
[
'loss_seg'
]
+
loss_dict
[
'loss_part'
]
total_loss
.
backward
()
total_loss
.
backward
()
if
__name__
==
'__main__'
:
test_PointwiseSemanticHead
()
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment