Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
TS-MODELS-OPT
training
Autonomous-Driving-models
Commits
ba3cd005
Commit
ba3cd005
authored
Apr 08, 2026
by
雍大凯
Browse files
将子模块转换为普通目录
parent
d2b71343
Changes
418
Hide whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
5311 additions
and
0 deletions
+5311
-0
docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/core/utils/gaussian.py
...OCC/Flashocc/mmdetection3d/mmdet3d/core/utils/gaussian.py
+158
-0
docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/core/visualizer/__init__.py
...lashocc/mmdetection3d/mmdet3d/core/visualizer/__init__.py
+5
-0
docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/core/visualizer/image_vis.py
...ashocc/mmdetection3d/mmdet3d/core/visualizer/image_vis.py
+206
-0
docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/core/visualizer/open3d_vis.py
...shocc/mmdetection3d/mmdet3d/core/visualizer/open3d_vis.py
+460
-0
docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/core/visualizer/show_result.py
...hocc/mmdetection3d/mmdet3d/core/visualizer/show_result.py
+291
-0
docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/core/voxel/__init__.py
...OCC/Flashocc/mmdetection3d/mmdet3d/core/voxel/__init__.py
+5
-0
docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/core/voxel/builder.py
...hOCC/Flashocc/mmdetection3d/mmdet3d/core/voxel/builder.py
+16
-0
docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/core/voxel/voxel_generator.py
...shocc/mmdetection3d/mmdet3d/core/voxel/voxel_generator.py
+280
-0
docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/__init__.py
...shOCC/Flashocc/mmdetection3d/mmdet3d/datasets/__init__.py
+47
-0
docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/builder.py
...ashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/builder.py
+47
-0
docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/custom_3d.py
...hOCC/Flashocc/mmdetection3d/mmdet3d/datasets/custom_3d.py
+448
-0
docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/custom_3d_seg.py
.../Flashocc/mmdetection3d/mmdet3d/datasets/custom_3d_seg.py
+465
-0
docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/dataset_wrappers.py
...ashocc/mmdetection3d/mmdet3d/datasets/dataset_wrappers.py
+76
-0
docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/kitti2d_dataset.py
...lashocc/mmdetection3d/mmdet3d/datasets/kitti2d_dataset.py
+241
-0
docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/kitti_dataset.py
.../Flashocc/mmdetection3d/mmdet3d/datasets/kitti_dataset.py
+773
-0
docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/kitti_mono_dataset.py
...hocc/mmdetection3d/mmdet3d/datasets/kitti_mono_dataset.py
+569
-0
docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/lyft_dataset.py
...C/Flashocc/mmdetection3d/mmdet3d/datasets/lyft_dataset.py
+567
-0
docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/nuscenes_dataset.py
...ashocc/mmdetection3d/mmdet3d/datasets/nuscenes_dataset.py
+657
-0
No files found.
Too many changes to show.
To preserve performance only
418 of 418+
files are displayed.
Plain diff
Email patch
docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/core/utils/gaussian.py
0 → 100644
View file @
ba3cd005
# Copyright (c) OpenMMLab. All rights reserved.
import
numpy
as
np
import
torch
def
gaussian_2d
(
shape
,
sigma
=
1
):
"""Generate gaussian map.
Args:
shape (list[int]): Shape of the map.
sigma (float, optional): Sigma to generate gaussian map.
Defaults to 1.
Returns:
np.ndarray: Generated gaussian map.
"""
m
,
n
=
[(
ss
-
1.
)
/
2.
for
ss
in
shape
]
y
,
x
=
np
.
ogrid
[
-
m
:
m
+
1
,
-
n
:
n
+
1
]
h
=
np
.
exp
(
-
(
x
*
x
+
y
*
y
)
/
(
2
*
sigma
*
sigma
))
h
[
h
<
np
.
finfo
(
h
.
dtype
).
eps
*
h
.
max
()]
=
0
return
h
def
draw_heatmap_gaussian
(
heatmap
,
center
,
radius
,
k
=
1
):
"""Get gaussian masked heatmap.
Args:
heatmap (torch.Tensor): Heatmap to be masked.
center (torch.Tensor): Center coord of the heatmap.
radius (int): Radius of gaussian.
K (int, optional): Multiple of masked_gaussian. Defaults to 1.
Returns:
torch.Tensor: Masked heatmap.
"""
diameter
=
2
*
radius
+
1
gaussian
=
gaussian_2d
((
diameter
,
diameter
),
sigma
=
diameter
/
6
)
x
,
y
=
int
(
center
[
0
]),
int
(
center
[
1
])
height
,
width
=
heatmap
.
shape
[
0
:
2
]
left
,
right
=
min
(
x
,
radius
),
min
(
width
-
x
,
radius
+
1
)
top
,
bottom
=
min
(
y
,
radius
),
min
(
height
-
y
,
radius
+
1
)
masked_heatmap
=
heatmap
[
y
-
top
:
y
+
bottom
,
x
-
left
:
x
+
right
]
masked_gaussian
=
torch
.
from_numpy
(
gaussian
[
radius
-
top
:
radius
+
bottom
,
radius
-
left
:
radius
+
right
]).
to
(
heatmap
.
device
,
torch
.
float32
)
if
min
(
masked_gaussian
.
shape
)
>
0
and
min
(
masked_heatmap
.
shape
)
>
0
:
torch
.
max
(
masked_heatmap
,
masked_gaussian
*
k
,
out
=
masked_heatmap
)
return
heatmap
def
gaussian_radius
(
det_size
,
min_overlap
=
0.5
):
"""Get radius of gaussian.
Args:
det_size (tuple[torch.Tensor]): Size of the detection result.
min_overlap (float, optional): Gaussian_overlap. Defaults to 0.5.
Returns:
torch.Tensor: Computed radius.
"""
height
,
width
=
det_size
a1
=
1
b1
=
(
height
+
width
)
c1
=
width
*
height
*
(
1
-
min_overlap
)
/
(
1
+
min_overlap
)
sq1
=
torch
.
sqrt
(
b1
**
2
-
4
*
a1
*
c1
)
r1
=
(
b1
+
sq1
)
/
2
a2
=
4
b2
=
2
*
(
height
+
width
)
c2
=
(
1
-
min_overlap
)
*
width
*
height
sq2
=
torch
.
sqrt
(
b2
**
2
-
4
*
a2
*
c2
)
r2
=
(
b2
+
sq2
)
/
2
a3
=
4
*
min_overlap
b3
=
-
2
*
min_overlap
*
(
height
+
width
)
c3
=
(
min_overlap
-
1
)
*
width
*
height
sq3
=
torch
.
sqrt
(
b3
**
2
-
4
*
a3
*
c3
)
r3
=
(
b3
+
sq3
)
/
2
return
min
(
r1
,
r2
,
r3
)
def
get_ellip_gaussian_2D
(
heatmap
,
center
,
radius_x
,
radius_y
,
k
=
1
):
"""Generate 2D ellipse gaussian heatmap.
Args:
heatmap (Tensor): Input heatmap, the gaussian kernel will cover on
it and maintain the max value.
center (list[int]): Coord of gaussian kernel's center.
radius_x (int): X-axis radius of gaussian kernel.
radius_y (int): Y-axis radius of gaussian kernel.
k (int, optional): Coefficient of gaussian kernel. Default: 1.
Returns:
out_heatmap (Tensor): Updated heatmap covered by gaussian kernel.
"""
diameter_x
,
diameter_y
=
2
*
radius_x
+
1
,
2
*
radius_y
+
1
gaussian_kernel
=
ellip_gaussian2D
((
radius_x
,
radius_y
),
sigma_x
=
diameter_x
/
6
,
sigma_y
=
diameter_y
/
6
,
dtype
=
heatmap
.
dtype
,
device
=
heatmap
.
device
)
x
,
y
=
int
(
center
[
0
]),
int
(
center
[
1
])
height
,
width
=
heatmap
.
shape
[
0
:
2
]
left
,
right
=
min
(
x
,
radius_x
),
min
(
width
-
x
,
radius_x
+
1
)
top
,
bottom
=
min
(
y
,
radius_y
),
min
(
height
-
y
,
radius_y
+
1
)
masked_heatmap
=
heatmap
[
y
-
top
:
y
+
bottom
,
x
-
left
:
x
+
right
]
masked_gaussian
=
gaussian_kernel
[
radius_y
-
top
:
radius_y
+
bottom
,
radius_x
-
left
:
radius_x
+
right
]
out_heatmap
=
heatmap
torch
.
max
(
masked_heatmap
,
masked_gaussian
*
k
,
out
=
out_heatmap
[
y
-
top
:
y
+
bottom
,
x
-
left
:
x
+
right
])
return
out_heatmap
def
ellip_gaussian2D
(
radius
,
sigma_x
,
sigma_y
,
dtype
=
torch
.
float32
,
device
=
'cpu'
):
"""Generate 2D ellipse gaussian kernel.
Args:
radius (tuple(int)): Ellipse radius (radius_x, radius_y) of gaussian
kernel.
sigma_x (int): X-axis sigma of gaussian function.
sigma_y (int): Y-axis sigma of gaussian function.
dtype (torch.dtype, optional): Dtype of gaussian tensor.
Default: torch.float32.
device (str, optional): Device of gaussian tensor.
Default: 'cpu'.
Returns:
h (Tensor): Gaussian kernel with a
``(2 * radius_y + 1) * (2 * radius_x + 1)`` shape.
"""
x
=
torch
.
arange
(
-
radius
[
0
],
radius
[
0
]
+
1
,
dtype
=
dtype
,
device
=
device
).
view
(
1
,
-
1
)
y
=
torch
.
arange
(
-
radius
[
1
],
radius
[
1
]
+
1
,
dtype
=
dtype
,
device
=
device
).
view
(
-
1
,
1
)
h
=
(
-
(
x
*
x
)
/
(
2
*
sigma_x
*
sigma_x
)
-
(
y
*
y
)
/
(
2
*
sigma_y
*
sigma_y
)).
exp
()
h
[
h
<
torch
.
finfo
(
h
.
dtype
).
eps
*
h
.
max
()]
=
0
return
h
docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/core/visualizer/__init__.py
0 → 100644
View file @
ba3cd005
# Copyright (c) OpenMMLab. All rights reserved.
from
.show_result
import
(
show_multi_modality_result
,
show_result
,
show_seg_result
)
__all__
=
[
'show_result'
,
'show_seg_result'
,
'show_multi_modality_result'
]
docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/core/visualizer/image_vis.py
0 → 100644
View file @
ba3cd005
# Copyright (c) OpenMMLab. All rights reserved.
import
copy
import
cv2
import
numpy
as
np
import
torch
from
matplotlib
import
pyplot
as
plt
def
project_pts_on_img
(
points
,
raw_img
,
lidar2img_rt
,
max_distance
=
70
,
thickness
=-
1
):
"""Project the 3D points cloud on 2D image.
Args:
points (numpy.array): 3D points cloud (x, y, z) to visualize.
raw_img (numpy.array): The numpy array of image.
lidar2img_rt (numpy.array, shape=[4, 4]): The projection matrix
according to the camera intrinsic parameters.
max_distance (float, optional): the max distance of the points cloud.
Default: 70.
thickness (int, optional): The thickness of 2D points. Default: -1.
"""
img
=
raw_img
.
copy
()
num_points
=
points
.
shape
[
0
]
pts_4d
=
np
.
concatenate
([
points
[:,
:
3
],
np
.
ones
((
num_points
,
1
))],
axis
=-
1
)
pts_2d
=
pts_4d
@
lidar2img_rt
.
T
# cam_points is Tensor of Nx4 whose last column is 1
# transform camera coordinate to image coordinate
pts_2d
[:,
2
]
=
np
.
clip
(
pts_2d
[:,
2
],
a_min
=
1e-5
,
a_max
=
99999
)
pts_2d
[:,
0
]
/=
pts_2d
[:,
2
]
pts_2d
[:,
1
]
/=
pts_2d
[:,
2
]
fov_inds
=
((
pts_2d
[:,
0
]
<
img
.
shape
[
1
])
&
(
pts_2d
[:,
0
]
>=
0
)
&
(
pts_2d
[:,
1
]
<
img
.
shape
[
0
])
&
(
pts_2d
[:,
1
]
>=
0
))
imgfov_pts_2d
=
pts_2d
[
fov_inds
,
:
3
]
# u, v, d
cmap
=
plt
.
cm
.
get_cmap
(
'hsv'
,
256
)
cmap
=
np
.
array
([
cmap
(
i
)
for
i
in
range
(
256
)])[:,
:
3
]
*
255
for
i
in
range
(
imgfov_pts_2d
.
shape
[
0
]):
depth
=
imgfov_pts_2d
[
i
,
2
]
color
=
cmap
[
np
.
clip
(
int
(
max_distance
*
10
/
depth
),
0
,
255
),
:]
cv2
.
circle
(
img
,
center
=
(
int
(
np
.
round
(
imgfov_pts_2d
[
i
,
0
])),
int
(
np
.
round
(
imgfov_pts_2d
[
i
,
1
]))),
radius
=
1
,
color
=
tuple
(
color
),
thickness
=
thickness
,
)
cv2
.
imshow
(
'project_pts_img'
,
img
.
astype
(
np
.
uint8
))
cv2
.
waitKey
(
100
)
def
plot_rect3d_on_img
(
img
,
num_rects
,
rect_corners
,
color
=
(
0
,
255
,
0
),
thickness
=
1
):
"""Plot the boundary lines of 3D rectangular on 2D images.
Args:
img (numpy.array): The numpy array of image.
num_rects (int): Number of 3D rectangulars.
rect_corners (numpy.array): Coordinates of the corners of 3D
rectangulars. Should be in the shape of [num_rect, 8, 2].
color (tuple[int], optional): The color to draw bboxes.
Default: (0, 255, 0).
thickness (int, optional): The thickness of bboxes. Default: 1.
"""
line_indices
=
((
0
,
1
),
(
0
,
3
),
(
0
,
4
),
(
1
,
2
),
(
1
,
5
),
(
3
,
2
),
(
3
,
7
),
(
4
,
5
),
(
4
,
7
),
(
2
,
6
),
(
5
,
6
),
(
6
,
7
))
for
i
in
range
(
num_rects
):
corners
=
rect_corners
[
i
].
astype
(
np
.
int
)
for
start
,
end
in
line_indices
:
cv2
.
line
(
img
,
(
corners
[
start
,
0
],
corners
[
start
,
1
]),
(
corners
[
end
,
0
],
corners
[
end
,
1
]),
color
,
thickness
,
cv2
.
LINE_AA
)
return
img
.
astype
(
np
.
uint8
)
def
draw_lidar_bbox3d_on_img
(
bboxes3d
,
raw_img
,
lidar2img_rt
,
img_metas
,
color
=
(
0
,
255
,
0
),
thickness
=
1
):
"""Project the 3D bbox on 2D plane and draw on input image.
Args:
bboxes3d (:obj:`LiDARInstance3DBoxes`):
3d bbox in lidar coordinate system to visualize.
raw_img (numpy.array): The numpy array of image.
lidar2img_rt (numpy.array, shape=[4, 4]): The projection matrix
according to the camera intrinsic parameters.
img_metas (dict): Useless here.
color (tuple[int], optional): The color to draw bboxes.
Default: (0, 255, 0).
thickness (int, optional): The thickness of bboxes. Default: 1.
"""
img
=
raw_img
.
copy
()
corners_3d
=
bboxes3d
.
corners
num_bbox
=
corners_3d
.
shape
[
0
]
pts_4d
=
np
.
concatenate
(
[
corners_3d
.
reshape
(
-
1
,
3
),
np
.
ones
((
num_bbox
*
8
,
1
))],
axis
=-
1
)
lidar2img_rt
=
copy
.
deepcopy
(
lidar2img_rt
).
reshape
(
4
,
4
)
if
isinstance
(
lidar2img_rt
,
torch
.
Tensor
):
lidar2img_rt
=
lidar2img_rt
.
cpu
().
numpy
()
pts_2d
=
pts_4d
@
lidar2img_rt
.
T
pts_2d
[:,
2
]
=
np
.
clip
(
pts_2d
[:,
2
],
a_min
=
1e-5
,
a_max
=
1e5
)
pts_2d
[:,
0
]
/=
pts_2d
[:,
2
]
pts_2d
[:,
1
]
/=
pts_2d
[:,
2
]
imgfov_pts_2d
=
pts_2d
[...,
:
2
].
reshape
(
num_bbox
,
8
,
2
)
return
plot_rect3d_on_img
(
img
,
num_bbox
,
imgfov_pts_2d
,
color
,
thickness
)
# TODO: remove third parameter in all functions here in favour of img_metas
def
draw_depth_bbox3d_on_img
(
bboxes3d
,
raw_img
,
calibs
,
img_metas
,
color
=
(
0
,
255
,
0
),
thickness
=
1
):
"""Project the 3D bbox on 2D plane and draw on input image.
Args:
bboxes3d (:obj:`DepthInstance3DBoxes`, shape=[M, 7]):
3d bbox in depth coordinate system to visualize.
raw_img (numpy.array): The numpy array of image.
calibs (dict): Camera calibration information, Rt and K.
img_metas (dict): Used in coordinates transformation.
color (tuple[int], optional): The color to draw bboxes.
Default: (0, 255, 0).
thickness (int, optional): The thickness of bboxes. Default: 1.
"""
from
mmdet3d.core.bbox
import
points_cam2img
from
mmdet3d.models
import
apply_3d_transformation
img
=
raw_img
.
copy
()
img_metas
=
copy
.
deepcopy
(
img_metas
)
corners_3d
=
bboxes3d
.
corners
num_bbox
=
corners_3d
.
shape
[
0
]
points_3d
=
corners_3d
.
reshape
(
-
1
,
3
)
# first reverse the data transformations
xyz_depth
=
apply_3d_transformation
(
points_3d
,
'DEPTH'
,
img_metas
,
reverse
=
True
)
# project to 2d to get image coords (uv)
uv_origin
=
points_cam2img
(
xyz_depth
,
xyz_depth
.
new_tensor
(
img_metas
[
'depth2img'
]))
uv_origin
=
(
uv_origin
-
1
).
round
()
imgfov_pts_2d
=
uv_origin
[...,
:
2
].
reshape
(
num_bbox
,
8
,
2
).
numpy
()
return
plot_rect3d_on_img
(
img
,
num_bbox
,
imgfov_pts_2d
,
color
,
thickness
)
def
draw_camera_bbox3d_on_img
(
bboxes3d
,
raw_img
,
cam2img
,
img_metas
,
color
=
(
0
,
255
,
0
),
thickness
=
1
):
"""Project the 3D bbox on 2D plane and draw on input image.
Args:
bboxes3d (:obj:`CameraInstance3DBoxes`, shape=[M, 7]):
3d bbox in camera coordinate system to visualize.
raw_img (numpy.array): The numpy array of image.
cam2img (dict): Camera intrinsic matrix,
denoted as `K` in depth bbox coordinate system.
img_metas (dict): Useless here.
color (tuple[int], optional): The color to draw bboxes.
Default: (0, 255, 0).
thickness (int, optional): The thickness of bboxes. Default: 1.
"""
from
mmdet3d.core.bbox
import
points_cam2img
img
=
raw_img
.
copy
()
cam2img
=
copy
.
deepcopy
(
cam2img
)
corners_3d
=
bboxes3d
.
corners
num_bbox
=
corners_3d
.
shape
[
0
]
points_3d
=
corners_3d
.
reshape
(
-
1
,
3
)
if
not
isinstance
(
cam2img
,
torch
.
Tensor
):
cam2img
=
torch
.
from_numpy
(
np
.
array
(
cam2img
))
assert
(
cam2img
.
shape
==
torch
.
Size
([
3
,
3
])
or
cam2img
.
shape
==
torch
.
Size
([
4
,
4
]))
cam2img
=
cam2img
.
float
().
cpu
()
# project to 2d to get image coords (uv)
uv_origin
=
points_cam2img
(
points_3d
,
cam2img
)
uv_origin
=
(
uv_origin
-
1
).
round
()
imgfov_pts_2d
=
uv_origin
[...,
:
2
].
reshape
(
num_bbox
,
8
,
2
).
numpy
()
return
plot_rect3d_on_img
(
img
,
num_bbox
,
imgfov_pts_2d
,
color
,
thickness
)
docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/core/visualizer/open3d_vis.py
0 → 100644
View file @
ba3cd005
# Copyright (c) OpenMMLab. All rights reserved.
import
copy
import
numpy
as
np
import
torch
try
:
import
open3d
as
o3d
from
open3d
import
geometry
except
ImportError
:
raise
ImportError
(
'Please run "pip install open3d" to install open3d first.'
)
def
_draw_points
(
points
,
vis
,
points_size
=
2
,
point_color
=
(
0.5
,
0.5
,
0.5
),
mode
=
'xyz'
):
"""Draw points on visualizer.
Args:
points (numpy.array | torch.tensor, shape=[N, 3+C]):
points to visualize.
vis (:obj:`open3d.visualization.Visualizer`): open3d visualizer.
points_size (int, optional): the size of points to show on visualizer.
Default: 2.
point_color (tuple[float], optional): the color of points.
Default: (0.5, 0.5, 0.5).
mode (str, optional): indicate type of the input points,
available mode ['xyz', 'xyzrgb']. Default: 'xyz'.
Returns:
tuple: points, color of each point.
"""
vis
.
get_render_option
().
point_size
=
points_size
# set points size
if
isinstance
(
points
,
torch
.
Tensor
):
points
=
points
.
cpu
().
numpy
()
points
=
points
.
copy
()
pcd
=
geometry
.
PointCloud
()
if
mode
==
'xyz'
:
pcd
.
points
=
o3d
.
utility
.
Vector3dVector
(
points
[:,
:
3
])
points_colors
=
np
.
tile
(
np
.
array
(
point_color
),
(
points
.
shape
[
0
],
1
))
elif
mode
==
'xyzrgb'
:
pcd
.
points
=
o3d
.
utility
.
Vector3dVector
(
points
[:,
:
3
])
points_colors
=
points
[:,
3
:
6
]
# normalize to [0, 1] for open3d drawing
if
not
((
points_colors
>=
0.0
)
&
(
points_colors
<=
1.0
)).
all
():
points_colors
/=
255.0
else
:
raise
NotImplementedError
pcd
.
colors
=
o3d
.
utility
.
Vector3dVector
(
points_colors
)
vis
.
add_geometry
(
pcd
)
return
pcd
,
points_colors
def
_draw_bboxes
(
bbox3d
,
vis
,
points_colors
,
pcd
=
None
,
bbox_color
=
(
0
,
1
,
0
),
points_in_box_color
=
(
1
,
0
,
0
),
rot_axis
=
2
,
center_mode
=
'lidar_bottom'
,
mode
=
'xyz'
):
"""Draw bbox on visualizer and change the color of points inside bbox3d.
Args:
bbox3d (numpy.array | torch.tensor, shape=[M, 7]):
3d bbox (x, y, z, x_size, y_size, z_size, yaw) to visualize.
vis (:obj:`open3d.visualization.Visualizer`): open3d visualizer.
points_colors (numpy.array): color of each points.
pcd (:obj:`open3d.geometry.PointCloud`, optional): point cloud.
Default: None.
bbox_color (tuple[float], optional): the color of bbox.
Default: (0, 1, 0).
points_in_box_color (tuple[float], optional):
the color of points inside bbox3d. Default: (1, 0, 0).
rot_axis (int, optional): rotation axis of bbox. Default: 2.
center_mode (bool, optional): indicate the center of bbox is
bottom center or gravity center. available mode
['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.
mode (str, optional): indicate type of the input points,
available mode ['xyz', 'xyzrgb']. Default: 'xyz'.
"""
if
isinstance
(
bbox3d
,
torch
.
Tensor
):
bbox3d
=
bbox3d
.
cpu
().
numpy
()
bbox3d
=
bbox3d
.
copy
()
in_box_color
=
np
.
array
(
points_in_box_color
)
for
i
in
range
(
len
(
bbox3d
)):
center
=
bbox3d
[
i
,
0
:
3
]
dim
=
bbox3d
[
i
,
3
:
6
]
yaw
=
np
.
zeros
(
3
)
yaw
[
rot_axis
]
=
bbox3d
[
i
,
6
]
rot_mat
=
geometry
.
get_rotation_matrix_from_xyz
(
yaw
)
if
center_mode
==
'lidar_bottom'
:
center
[
rot_axis
]
+=
dim
[
rot_axis
]
/
2
# bottom center to gravity center
elif
center_mode
==
'camera_bottom'
:
center
[
rot_axis
]
-=
dim
[
rot_axis
]
/
2
# bottom center to gravity center
box3d
=
geometry
.
OrientedBoundingBox
(
center
,
rot_mat
,
dim
)
line_set
=
geometry
.
LineSet
.
create_from_oriented_bounding_box
(
box3d
)
line_set
.
paint_uniform_color
(
bbox_color
)
# draw bboxes on visualizer
vis
.
add_geometry
(
line_set
)
# change the color of points which are in box
if
pcd
is
not
None
and
mode
==
'xyz'
:
indices
=
box3d
.
get_point_indices_within_bounding_box
(
pcd
.
points
)
points_colors
[
indices
]
=
in_box_color
# update points colors
if
pcd
is
not
None
:
pcd
.
colors
=
o3d
.
utility
.
Vector3dVector
(
points_colors
)
vis
.
update_geometry
(
pcd
)
def
show_pts_boxes
(
points
,
bbox3d
=
None
,
show
=
True
,
save_path
=
None
,
points_size
=
2
,
point_color
=
(
0.5
,
0.5
,
0.5
),
bbox_color
=
(
0
,
1
,
0
),
points_in_box_color
=
(
1
,
0
,
0
),
rot_axis
=
2
,
center_mode
=
'lidar_bottom'
,
mode
=
'xyz'
):
"""Draw bbox and points on visualizer.
Args:
points (numpy.array | torch.tensor, shape=[N, 3+C]):
points to visualize.
bbox3d (numpy.array | torch.tensor, shape=[M, 7], optional):
3D bbox (x, y, z, x_size, y_size, z_size, yaw) to visualize.
Defaults to None.
show (bool, optional): whether to show the visualization results.
Default: True.
save_path (str, optional): path to save visualized results.
Default: None.
points_size (int, optional): the size of points to show on visualizer.
Default: 2.
point_color (tuple[float], optional): the color of points.
Default: (0.5, 0.5, 0.5).
bbox_color (tuple[float], optional): the color of bbox.
Default: (0, 1, 0).
points_in_box_color (tuple[float], optional):
the color of points which are in bbox3d. Default: (1, 0, 0).
rot_axis (int, optional): rotation axis of bbox. Default: 2.
center_mode (bool, optional): indicate the center of bbox is bottom
center or gravity center. available mode
['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.
mode (str, optional): indicate type of the input points, available
mode ['xyz', 'xyzrgb']. Default: 'xyz'.
"""
# TODO: support score and class info
assert
0
<=
rot_axis
<=
2
# init visualizer
vis
=
o3d
.
visualization
.
Visualizer
()
vis
.
create_window
()
mesh_frame
=
geometry
.
TriangleMesh
.
create_coordinate_frame
(
size
=
1
,
origin
=
[
0
,
0
,
0
])
# create coordinate frame
vis
.
add_geometry
(
mesh_frame
)
# draw points
pcd
,
points_colors
=
_draw_points
(
points
,
vis
,
points_size
,
point_color
,
mode
)
# draw boxes
if
bbox3d
is
not
None
:
_draw_bboxes
(
bbox3d
,
vis
,
points_colors
,
pcd
,
bbox_color
,
points_in_box_color
,
rot_axis
,
center_mode
,
mode
)
if
show
:
vis
.
run
()
if
save_path
is
not
None
:
vis
.
capture_screen_image
(
save_path
)
vis
.
destroy_window
()
def
_draw_bboxes_ind
(
bbox3d
,
vis
,
indices
,
points_colors
,
pcd
=
None
,
bbox_color
=
(
0
,
1
,
0
),
points_in_box_color
=
(
1
,
0
,
0
),
rot_axis
=
2
,
center_mode
=
'lidar_bottom'
,
mode
=
'xyz'
):
"""Draw bbox on visualizer and change the color or points inside bbox3d
with indices.
Args:
bbox3d (numpy.array | torch.tensor, shape=[M, 7]):
3d bbox (x, y, z, x_size, y_size, z_size, yaw) to visualize.
vis (:obj:`open3d.visualization.Visualizer`): open3d visualizer.
indices (numpy.array | torch.tensor, shape=[N, M]):
indicate which bbox3d that each point lies in.
points_colors (numpy.array): color of each points.
pcd (:obj:`open3d.geometry.PointCloud`, optional): point cloud.
Default: None.
bbox_color (tuple[float], optional): the color of bbox.
Default: (0, 1, 0).
points_in_box_color (tuple[float], optional):
the color of points which are in bbox3d. Default: (1, 0, 0).
rot_axis (int, optional): rotation axis of bbox. Default: 2.
center_mode (bool, optional): indicate the center of bbox is
bottom center or gravity center. available mode
['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.
mode (str, optional): indicate type of the input points,
available mode ['xyz', 'xyzrgb']. Default: 'xyz'.
"""
if
isinstance
(
bbox3d
,
torch
.
Tensor
):
bbox3d
=
bbox3d
.
cpu
().
numpy
()
if
isinstance
(
indices
,
torch
.
Tensor
):
indices
=
indices
.
cpu
().
numpy
()
bbox3d
=
bbox3d
.
copy
()
in_box_color
=
np
.
array
(
points_in_box_color
)
for
i
in
range
(
len
(
bbox3d
)):
center
=
bbox3d
[
i
,
0
:
3
]
dim
=
bbox3d
[
i
,
3
:
6
]
yaw
=
np
.
zeros
(
3
)
# TODO: fix problem of current coordinate system
# dim[0], dim[1] = dim[1], dim[0] # for current coordinate
# yaw[rot_axis] = -(bbox3d[i, 6] - 0.5 * np.pi)
yaw
[
rot_axis
]
=
-
bbox3d
[
i
,
6
]
rot_mat
=
geometry
.
get_rotation_matrix_from_xyz
(
yaw
)
if
center_mode
==
'lidar_bottom'
:
center
[
rot_axis
]
+=
dim
[
rot_axis
]
/
2
# bottom center to gravity center
elif
center_mode
==
'camera_bottom'
:
center
[
rot_axis
]
-=
dim
[
rot_axis
]
/
2
# bottom center to gravity center
box3d
=
geometry
.
OrientedBoundingBox
(
center
,
rot_mat
,
dim
)
line_set
=
geometry
.
LineSet
.
create_from_oriented_bounding_box
(
box3d
)
line_set
.
paint_uniform_color
(
bbox_color
)
# draw bboxes on visualizer
vis
.
add_geometry
(
line_set
)
# change the color of points which are in box
if
pcd
is
not
None
and
mode
==
'xyz'
:
points_colors
[
indices
[:,
i
].
astype
(
np
.
bool
)]
=
in_box_color
# update points colors
if
pcd
is
not
None
:
pcd
.
colors
=
o3d
.
utility
.
Vector3dVector
(
points_colors
)
vis
.
update_geometry
(
pcd
)
def
show_pts_index_boxes
(
points
,
bbox3d
=
None
,
show
=
True
,
indices
=
None
,
save_path
=
None
,
points_size
=
2
,
point_color
=
(
0.5
,
0.5
,
0.5
),
bbox_color
=
(
0
,
1
,
0
),
points_in_box_color
=
(
1
,
0
,
0
),
rot_axis
=
2
,
center_mode
=
'lidar_bottom'
,
mode
=
'xyz'
):
"""Draw bbox and points on visualizer with indices that indicate which
bbox3d that each point lies in.
Args:
points (numpy.array | torch.tensor, shape=[N, 3+C]):
points to visualize.
bbox3d (numpy.array | torch.tensor, shape=[M, 7]):
3D bbox (x, y, z, x_size, y_size, z_size, yaw) to visualize.
Defaults to None.
show (bool, optional): whether to show the visualization results.
Default: True.
indices (numpy.array | torch.tensor, shape=[N, M], optional):
indicate which bbox3d that each point lies in. Default: None.
save_path (str, optional): path to save visualized results.
Default: None.
points_size (int, optional): the size of points to show on visualizer.
Default: 2.
point_color (tuple[float], optional): the color of points.
Default: (0.5, 0.5, 0.5).
bbox_color (tuple[float], optional): the color of bbox.
Default: (0, 1, 0).
points_in_box_color (tuple[float], optional):
the color of points which are in bbox3d. Default: (1, 0, 0).
rot_axis (int, optional): rotation axis of bbox. Default: 2.
center_mode (bool, optional): indicate the center of bbox is
bottom center or gravity center. available mode
['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.
mode (str, optional): indicate type of the input points,
available mode ['xyz', 'xyzrgb']. Default: 'xyz'.
"""
# TODO: support score and class info
assert
0
<=
rot_axis
<=
2
# init visualizer
vis
=
o3d
.
visualization
.
Visualizer
()
vis
.
create_window
()
mesh_frame
=
geometry
.
TriangleMesh
.
create_coordinate_frame
(
size
=
1
,
origin
=
[
0
,
0
,
0
])
# create coordinate frame
vis
.
add_geometry
(
mesh_frame
)
# draw points
pcd
,
points_colors
=
_draw_points
(
points
,
vis
,
points_size
,
point_color
,
mode
)
# draw boxes
if
bbox3d
is
not
None
:
_draw_bboxes_ind
(
bbox3d
,
vis
,
indices
,
points_colors
,
pcd
,
bbox_color
,
points_in_box_color
,
rot_axis
,
center_mode
,
mode
)
if
show
:
vis
.
run
()
if
save_path
is
not
None
:
vis
.
capture_screen_image
(
save_path
)
vis
.
destroy_window
()
class
Visualizer
(
object
):
r
"""Online visualizer implemented with Open3d.
Args:
points (numpy.array, shape=[N, 3+C]): Points to visualize. The Points
cloud is in mode of Coord3DMode.DEPTH (please refer to
core.structures.coord_3d_mode).
bbox3d (numpy.array, shape=[M, 7], optional): 3D bbox
(x, y, z, x_size, y_size, z_size, yaw) to visualize.
The 3D bbox is in mode of Box3DMode.DEPTH with
gravity_center (please refer to core.structures.box_3d_mode).
Default: None.
save_path (str, optional): path to save visualized results.
Default: None.
points_size (int, optional): the size of points to show on visualizer.
Default: 2.
point_color (tuple[float], optional): the color of points.
Default: (0.5, 0.5, 0.5).
bbox_color (tuple[float], optional): the color of bbox.
Default: (0, 1, 0).
points_in_box_color (tuple[float], optional):
the color of points which are in bbox3d. Default: (1, 0, 0).
rot_axis (int, optional): rotation axis of bbox. Default: 2.
center_mode (bool, optional): indicate the center of bbox is
bottom center or gravity center. available mode
['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.
mode (str, optional): indicate type of the input points,
available mode ['xyz', 'xyzrgb']. Default: 'xyz'.
"""
def
__init__
(
self
,
points
,
bbox3d
=
None
,
save_path
=
None
,
points_size
=
2
,
point_color
=
(
0.5
,
0.5
,
0.5
),
bbox_color
=
(
0
,
1
,
0
),
points_in_box_color
=
(
1
,
0
,
0
),
rot_axis
=
2
,
center_mode
=
'lidar_bottom'
,
mode
=
'xyz'
):
super
(
Visualizer
,
self
).
__init__
()
assert
0
<=
rot_axis
<=
2
# init visualizer
self
.
o3d_visualizer
=
o3d
.
visualization
.
Visualizer
()
self
.
o3d_visualizer
.
create_window
()
mesh_frame
=
geometry
.
TriangleMesh
.
create_coordinate_frame
(
size
=
1
,
origin
=
[
0
,
0
,
0
])
# create coordinate frame
self
.
o3d_visualizer
.
add_geometry
(
mesh_frame
)
self
.
points_size
=
points_size
self
.
point_color
=
point_color
self
.
bbox_color
=
bbox_color
self
.
points_in_box_color
=
points_in_box_color
self
.
rot_axis
=
rot_axis
self
.
center_mode
=
center_mode
self
.
mode
=
mode
self
.
seg_num
=
0
# draw points
if
points
is
not
None
:
self
.
pcd
,
self
.
points_colors
=
_draw_points
(
points
,
self
.
o3d_visualizer
,
points_size
,
point_color
,
mode
)
# draw boxes
if
bbox3d
is
not
None
:
_draw_bboxes
(
bbox3d
,
self
.
o3d_visualizer
,
self
.
points_colors
,
self
.
pcd
,
bbox_color
,
points_in_box_color
,
rot_axis
,
center_mode
,
mode
)
def
add_bboxes
(
self
,
bbox3d
,
bbox_color
=
None
,
points_in_box_color
=
None
):
"""Add bounding box to visualizer.
Args:
bbox3d (numpy.array, shape=[M, 7]):
3D bbox (x, y, z, x_size, y_size, z_size, yaw)
to be visualized. The 3d bbox is in mode of
Box3DMode.DEPTH with gravity_center (please refer to
core.structures.box_3d_mode).
bbox_color (tuple[float]): the color of bbox. Default: None.
points_in_box_color (tuple[float]): the color of points which
are in bbox3d. Default: None.
"""
if
bbox_color
is
None
:
bbox_color
=
self
.
bbox_color
if
points_in_box_color
is
None
:
points_in_box_color
=
self
.
points_in_box_color
_draw_bboxes
(
bbox3d
,
self
.
o3d_visualizer
,
self
.
points_colors
,
self
.
pcd
,
bbox_color
,
points_in_box_color
,
self
.
rot_axis
,
self
.
center_mode
,
self
.
mode
)
def
add_seg_mask
(
self
,
seg_mask_colors
):
"""Add segmentation mask to visualizer via per-point colorization.
Args:
seg_mask_colors (numpy.array, shape=[N, 6]):
The segmentation mask whose first 3 dims are point coordinates
and last 3 dims are converted colors.
"""
# we can't draw the colors on existing points
# in case gt and pred mask would overlap
# instead we set a large offset along x-axis for each seg mask
self
.
seg_num
+=
1
offset
=
(
np
.
array
(
self
.
pcd
.
points
).
max
(
0
)
-
np
.
array
(
self
.
pcd
.
points
).
min
(
0
))[
0
]
*
1.2
*
self
.
seg_num
mesh_frame
=
geometry
.
TriangleMesh
.
create_coordinate_frame
(
size
=
1
,
origin
=
[
offset
,
0
,
0
])
# create coordinate frame for seg
self
.
o3d_visualizer
.
add_geometry
(
mesh_frame
)
seg_points
=
copy
.
deepcopy
(
seg_mask_colors
)
seg_points
[:,
0
]
+=
offset
_draw_points
(
seg_points
,
self
.
o3d_visualizer
,
self
.
points_size
,
mode
=
'xyzrgb'
)
def
show
(
self
,
save_path
=
None
):
"""Visualize the points cloud.
Args:
save_path (str, optional): path to save image. Default: None.
"""
self
.
o3d_visualizer
.
run
()
if
save_path
is
not
None
:
self
.
o3d_visualizer
.
capture_screen_image
(
save_path
)
self
.
o3d_visualizer
.
destroy_window
()
return
docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/core/visualizer/show_result.py
0 → 100644
View file @
ba3cd005
# Copyright (c) OpenMMLab. All rights reserved.
from
os
import
path
as
osp
import
mmcv
import
numpy
as
np
import
trimesh
from
.image_vis
import
(
draw_camera_bbox3d_on_img
,
draw_depth_bbox3d_on_img
,
draw_lidar_bbox3d_on_img
)
def
_write_obj
(
points
,
out_filename
):
"""Write points into ``obj`` format for meshlab visualization.
Args:
points (np.ndarray): Points in shape (N, dim).
out_filename (str): Filename to be saved.
"""
N
=
points
.
shape
[
0
]
fout
=
open
(
out_filename
,
'w'
)
for
i
in
range
(
N
):
if
points
.
shape
[
1
]
==
6
:
c
=
points
[
i
,
3
:].
astype
(
int
)
fout
.
write
(
'v %f %f %f %d %d %d
\n
'
%
(
points
[
i
,
0
],
points
[
i
,
1
],
points
[
i
,
2
],
c
[
0
],
c
[
1
],
c
[
2
]))
else
:
fout
.
write
(
'v %f %f %f
\n
'
%
(
points
[
i
,
0
],
points
[
i
,
1
],
points
[
i
,
2
]))
fout
.
close
()
def
_write_oriented_bbox
(
scene_bbox
,
out_filename
):
"""Export oriented (around Z axis) scene bbox to meshes.
Args:
scene_bbox(list[ndarray] or ndarray): xyz pos of center and
3 lengths (x_size, y_size, z_size) and heading angle around Z axis.
Y forward, X right, Z upward. heading angle of positive X is 0,
heading angle of positive Y is 90 degrees.
out_filename(str): Filename.
"""
def
heading2rotmat
(
heading_angle
):
rotmat
=
np
.
zeros
((
3
,
3
))
rotmat
[
2
,
2
]
=
1
cosval
=
np
.
cos
(
heading_angle
)
sinval
=
np
.
sin
(
heading_angle
)
rotmat
[
0
:
2
,
0
:
2
]
=
np
.
array
([[
cosval
,
-
sinval
],
[
sinval
,
cosval
]])
return
rotmat
def
convert_oriented_box_to_trimesh_fmt
(
box
):
ctr
=
box
[:
3
]
lengths
=
box
[
3
:
6
]
trns
=
np
.
eye
(
4
)
trns
[
0
:
3
,
3
]
=
ctr
trns
[
3
,
3
]
=
1.0
trns
[
0
:
3
,
0
:
3
]
=
heading2rotmat
(
box
[
6
])
box_trimesh_fmt
=
trimesh
.
creation
.
box
(
lengths
,
trns
)
return
box_trimesh_fmt
if
len
(
scene_bbox
)
==
0
:
scene_bbox
=
np
.
zeros
((
1
,
7
))
scene
=
trimesh
.
scene
.
Scene
()
for
box
in
scene_bbox
:
scene
.
add_geometry
(
convert_oriented_box_to_trimesh_fmt
(
box
))
mesh_list
=
trimesh
.
util
.
concatenate
(
scene
.
dump
())
# save to obj file
trimesh
.
io
.
export
.
export_mesh
(
mesh_list
,
out_filename
,
file_type
=
'obj'
)
return
def
show_result
(
points
,
gt_bboxes
,
pred_bboxes
,
out_dir
,
filename
,
show
=
False
,
snapshot
=
False
,
pred_labels
=
None
):
"""Convert results into format that is directly readable for meshlab.
Args:
points (np.ndarray): Points.
gt_bboxes (np.ndarray): Ground truth boxes.
pred_bboxes (np.ndarray): Predicted boxes.
out_dir (str): Path of output directory
filename (str): Filename of the current frame.
show (bool, optional): Visualize the results online. Defaults to False.
snapshot (bool, optional): Whether to save the online results.
Defaults to False.
pred_labels (np.ndarray, optional): Predicted labels of boxes.
Defaults to None.
"""
result_path
=
osp
.
join
(
out_dir
,
filename
)
mmcv
.
mkdir_or_exist
(
result_path
)
if
show
:
from
.open3d_vis
import
Visualizer
vis
=
Visualizer
(
points
)
if
pred_bboxes
is
not
None
:
if
pred_labels
is
None
:
vis
.
add_bboxes
(
bbox3d
=
pred_bboxes
)
else
:
palette
=
np
.
random
.
randint
(
0
,
255
,
size
=
(
pred_labels
.
max
()
+
1
,
3
))
/
256
labelDict
=
{}
for
j
in
range
(
len
(
pred_labels
)):
i
=
int
(
pred_labels
[
j
].
numpy
())
if
labelDict
.
get
(
i
)
is
None
:
labelDict
[
i
]
=
[]
labelDict
[
i
].
append
(
pred_bboxes
[
j
])
for
i
in
labelDict
:
vis
.
add_bboxes
(
bbox3d
=
np
.
array
(
labelDict
[
i
]),
bbox_color
=
palette
[
i
],
points_in_box_color
=
palette
[
i
])
if
gt_bboxes
is
not
None
:
vis
.
add_bboxes
(
bbox3d
=
gt_bboxes
,
bbox_color
=
(
0
,
0
,
1
))
show_path
=
osp
.
join
(
result_path
,
f
'
{
filename
}
_online.png'
)
if
snapshot
else
None
vis
.
show
(
show_path
)
if
points
is
not
None
:
_write_obj
(
points
,
osp
.
join
(
result_path
,
f
'
{
filename
}
_points.obj'
))
if
gt_bboxes
is
not
None
:
# bottom center to gravity center
gt_bboxes
[...,
2
]
+=
gt_bboxes
[...,
5
]
/
2
_write_oriented_bbox
(
gt_bboxes
,
osp
.
join
(
result_path
,
f
'
{
filename
}
_gt.obj'
))
if
pred_bboxes
is
not
None
:
# bottom center to gravity center
pred_bboxes
[...,
2
]
+=
pred_bboxes
[...,
5
]
/
2
_write_oriented_bbox
(
pred_bboxes
,
osp
.
join
(
result_path
,
f
'
{
filename
}
_pred.obj'
))
def
show_seg_result
(
points
,
gt_seg
,
pred_seg
,
out_dir
,
filename
,
palette
,
ignore_index
=
None
,
show
=
False
,
snapshot
=
False
):
"""Convert results into format that is directly readable for meshlab.
Args:
points (np.ndarray): Points.
gt_seg (np.ndarray): Ground truth segmentation mask.
pred_seg (np.ndarray): Predicted segmentation mask.
out_dir (str): Path of output directory
filename (str): Filename of the current frame.
palette (np.ndarray): Mapping between class labels and colors.
ignore_index (int, optional): The label index to be ignored, e.g.
unannotated points. Defaults to None.
show (bool, optional): Visualize the results online. Defaults to False.
snapshot (bool, optional): Whether to save the online results.
Defaults to False.
"""
# we need 3D coordinates to visualize segmentation mask
if
gt_seg
is
not
None
or
pred_seg
is
not
None
:
assert
points
is
not
None
,
\
'3D coordinates are required for segmentation visualization'
# filter out ignored points
if
gt_seg
is
not
None
and
ignore_index
is
not
None
:
if
points
is
not
None
:
points
=
points
[
gt_seg
!=
ignore_index
]
if
pred_seg
is
not
None
:
pred_seg
=
pred_seg
[
gt_seg
!=
ignore_index
]
gt_seg
=
gt_seg
[
gt_seg
!=
ignore_index
]
if
gt_seg
is
not
None
:
gt_seg_color
=
palette
[
gt_seg
]
gt_seg_color
=
np
.
concatenate
([
points
[:,
:
3
],
gt_seg_color
],
axis
=
1
)
if
pred_seg
is
not
None
:
pred_seg_color
=
palette
[
pred_seg
]
pred_seg_color
=
np
.
concatenate
([
points
[:,
:
3
],
pred_seg_color
],
axis
=
1
)
result_path
=
osp
.
join
(
out_dir
,
filename
)
mmcv
.
mkdir_or_exist
(
result_path
)
# online visualization of segmentation mask
# we show three masks in a row, scene_points, gt_mask, pred_mask
if
show
:
from
.open3d_vis
import
Visualizer
mode
=
'xyzrgb'
if
points
.
shape
[
1
]
==
6
else
'xyz'
vis
=
Visualizer
(
points
,
mode
=
mode
)
if
gt_seg
is
not
None
:
vis
.
add_seg_mask
(
gt_seg_color
)
if
pred_seg
is
not
None
:
vis
.
add_seg_mask
(
pred_seg_color
)
show_path
=
osp
.
join
(
result_path
,
f
'
{
filename
}
_online.png'
)
if
snapshot
else
None
vis
.
show
(
show_path
)
if
points
is
not
None
:
_write_obj
(
points
,
osp
.
join
(
result_path
,
f
'
{
filename
}
_points.obj'
))
if
gt_seg
is
not
None
:
_write_obj
(
gt_seg_color
,
osp
.
join
(
result_path
,
f
'
{
filename
}
_gt.obj'
))
if
pred_seg
is
not
None
:
_write_obj
(
pred_seg_color
,
osp
.
join
(
result_path
,
f
'
{
filename
}
_pred.obj'
))
def
show_multi_modality_result
(
img
,
gt_bboxes
,
pred_bboxes
,
proj_mat
,
out_dir
,
filename
,
box_mode
=
'lidar'
,
img_metas
=
None
,
show
=
False
,
gt_bbox_color
=
(
61
,
102
,
255
),
pred_bbox_color
=
(
241
,
101
,
72
)):
"""Convert multi-modality detection results into 2D results.
Project the predicted 3D bbox to 2D image plane and visualize them.
Args:
img (np.ndarray): The numpy array of image in cv2 fashion.
gt_bboxes (:obj:`BaseInstance3DBoxes`): Ground truth boxes.
pred_bboxes (:obj:`BaseInstance3DBoxes`): Predicted boxes.
proj_mat (numpy.array, shape=[4, 4]): The projection matrix
according to the camera intrinsic parameters.
out_dir (str): Path of output directory.
filename (str): Filename of the current frame.
box_mode (str, optional): Coordinate system the boxes are in.
Should be one of 'depth', 'lidar' and 'camera'.
Defaults to 'lidar'.
img_metas (dict, optional): Used in projecting depth bbox.
Defaults to None.
show (bool, optional): Visualize the results online. Defaults to False.
gt_bbox_color (str or tuple(int), optional): Color of bbox lines.
The tuple of color should be in BGR order. Default: (255, 102, 61).
pred_bbox_color (str or tuple(int), optional): Color of bbox lines.
The tuple of color should be in BGR order. Default: (72, 101, 241).
"""
if
box_mode
==
'depth'
:
draw_bbox
=
draw_depth_bbox3d_on_img
elif
box_mode
==
'lidar'
:
draw_bbox
=
draw_lidar_bbox3d_on_img
elif
box_mode
==
'camera'
:
draw_bbox
=
draw_camera_bbox3d_on_img
else
:
raise
NotImplementedError
(
f
'unsupported box mode
{
box_mode
}
'
)
result_path
=
osp
.
join
(
out_dir
,
filename
)
mmcv
.
mkdir_or_exist
(
result_path
)
if
show
:
show_img
=
img
.
copy
()
if
gt_bboxes
is
not
None
:
show_img
=
draw_bbox
(
gt_bboxes
,
show_img
,
proj_mat
,
img_metas
,
color
=
gt_bbox_color
)
if
pred_bboxes
is
not
None
:
show_img
=
draw_bbox
(
pred_bboxes
,
show_img
,
proj_mat
,
img_metas
,
color
=
pred_bbox_color
)
mmcv
.
imshow
(
show_img
,
win_name
=
'project_bbox3d_img'
,
wait_time
=
0
)
if
img
is
not
None
:
mmcv
.
imwrite
(
img
,
osp
.
join
(
result_path
,
f
'
{
filename
}
_img.png'
))
if
gt_bboxes
is
not
None
:
gt_img
=
draw_bbox
(
gt_bboxes
,
img
,
proj_mat
,
img_metas
,
color
=
gt_bbox_color
)
mmcv
.
imwrite
(
gt_img
,
osp
.
join
(
result_path
,
f
'
{
filename
}
_gt.png'
))
if
pred_bboxes
is
not
None
:
pred_img
=
draw_bbox
(
pred_bboxes
,
img
,
proj_mat
,
img_metas
,
color
=
pred_bbox_color
)
mmcv
.
imwrite
(
pred_img
,
osp
.
join
(
result_path
,
f
'
{
filename
}
_pred.png'
))
docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/core/voxel/__init__.py
0 → 100644
View file @
ba3cd005
# Copyright (c) OpenMMLab. All rights reserved.
from
.builder
import
build_voxel_generator
from
.voxel_generator
import
VoxelGenerator
__all__
=
[
'build_voxel_generator'
,
'VoxelGenerator'
]
docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/core/voxel/builder.py
0 → 100644
View file @
ba3cd005
# Copyright (c) OpenMMLab. All rights reserved.
import
mmcv
from
.
import
voxel_generator
def
build_voxel_generator
(
cfg
,
**
kwargs
):
"""Builder of voxel generator."""
if
isinstance
(
cfg
,
voxel_generator
.
VoxelGenerator
):
return
cfg
elif
isinstance
(
cfg
,
dict
):
return
mmcv
.
runner
.
obj_from_dict
(
cfg
,
voxel_generator
,
default_args
=
kwargs
)
else
:
raise
TypeError
(
'Invalid type {} for building a sampler'
.
format
(
type
(
cfg
)))
docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/core/voxel/voxel_generator.py
0 → 100644
View file @
ba3cd005
# Copyright (c) OpenMMLab. All rights reserved.
import
numba
import
numpy
as
np
class
VoxelGenerator
(
object
):
"""Voxel generator in numpy implementation.
Args:
voxel_size (list[float]): Size of a single voxel
point_cloud_range (list[float]): Range of points
max_num_points (int): Maximum number of points in a single voxel
max_voxels (int, optional): Maximum number of voxels.
Defaults to 20000.
"""
def
__init__
(
self
,
voxel_size
,
point_cloud_range
,
max_num_points
,
max_voxels
=
20000
):
point_cloud_range
=
np
.
array
(
point_cloud_range
,
dtype
=
np
.
float32
)
# [0, -40, -3, 70.4, 40, 1]
voxel_size
=
np
.
array
(
voxel_size
,
dtype
=
np
.
float32
)
grid_size
=
(
point_cloud_range
[
3
:]
-
point_cloud_range
[:
3
])
/
voxel_size
grid_size
=
np
.
round
(
grid_size
).
astype
(
np
.
int64
)
self
.
_voxel_size
=
voxel_size
self
.
_point_cloud_range
=
point_cloud_range
self
.
_max_num_points
=
max_num_points
self
.
_max_voxels
=
max_voxels
self
.
_grid_size
=
grid_size
def
generate
(
self
,
points
):
"""Generate voxels given points."""
return
points_to_voxel
(
points
,
self
.
_voxel_size
,
self
.
_point_cloud_range
,
self
.
_max_num_points
,
True
,
self
.
_max_voxels
)
@
property
def
voxel_size
(
self
):
"""list[float]: Size of a single voxel."""
return
self
.
_voxel_size
@
property
def
max_num_points_per_voxel
(
self
):
"""int: Maximum number of points per voxel."""
return
self
.
_max_num_points
@
property
def
point_cloud_range
(
self
):
"""list[float]: Range of point cloud."""
return
self
.
_point_cloud_range
@
property
def
grid_size
(
self
):
"""np.ndarray: The size of grids."""
return
self
.
_grid_size
def
__repr__
(
self
):
"""str: Return a string that describes the module."""
repr_str
=
self
.
__class__
.
__name__
indent
=
' '
*
(
len
(
repr_str
)
+
1
)
repr_str
+=
f
'(voxel_size=
{
self
.
_voxel_size
}
,
\n
'
repr_str
+=
indent
+
'point_cloud_range='
repr_str
+=
f
'
{
self
.
_point_cloud_range
.
tolist
()
}
,
\n
'
repr_str
+=
indent
+
f
'max_num_points=
{
self
.
_max_num_points
}
,
\n
'
repr_str
+=
indent
+
f
'max_voxels=
{
self
.
_max_voxels
}
,
\n
'
repr_str
+=
indent
+
f
'grid_size=
{
self
.
_grid_size
.
tolist
()
}
'
repr_str
+=
')'
return
repr_str
def
points_to_voxel
(
points
,
voxel_size
,
coors_range
,
max_points
=
35
,
reverse_index
=
True
,
max_voxels
=
20000
):
"""convert kitti points(N, >=3) to voxels.
Args:
points (np.ndarray): [N, ndim]. points[:, :3] contain xyz points and
points[:, 3:] contain other information such as reflectivity.
voxel_size (list, tuple, np.ndarray): [3] xyz, indicate voxel size
coors_range (list[float | tuple[float] | ndarray]): Voxel range.
format: xyzxyz, minmax
max_points (int): Indicate maximum points contained in a voxel.
reverse_index (bool): Whether return reversed coordinates.
if points has xyz format and reverse_index is True, output
coordinates will be zyx format, but points in features always
xyz format.
max_voxels (int): Maximum number of voxels this function creates.
For second, 20000 is a good choice. Points should be shuffled for
randomness before this function because max_voxels drops points.
Returns:
tuple[np.ndarray]:
voxels: [M, max_points, ndim] float tensor. only contain points.
coordinates: [M, 3] int32 tensor.
num_points_per_voxel: [M] int32 tensor.
"""
if
not
isinstance
(
voxel_size
,
np
.
ndarray
):
voxel_size
=
np
.
array
(
voxel_size
,
dtype
=
points
.
dtype
)
if
not
isinstance
(
coors_range
,
np
.
ndarray
):
coors_range
=
np
.
array
(
coors_range
,
dtype
=
points
.
dtype
)
voxelmap_shape
=
(
coors_range
[
3
:]
-
coors_range
[:
3
])
/
voxel_size
voxelmap_shape
=
tuple
(
np
.
round
(
voxelmap_shape
).
astype
(
np
.
int32
).
tolist
())
if
reverse_index
:
voxelmap_shape
=
voxelmap_shape
[::
-
1
]
# don't create large array in jit(nopython=True) code.
num_points_per_voxel
=
np
.
zeros
(
shape
=
(
max_voxels
,
),
dtype
=
np
.
int32
)
coor_to_voxelidx
=
-
np
.
ones
(
shape
=
voxelmap_shape
,
dtype
=
np
.
int32
)
voxels
=
np
.
zeros
(
shape
=
(
max_voxels
,
max_points
,
points
.
shape
[
-
1
]),
dtype
=
points
.
dtype
)
coors
=
np
.
zeros
(
shape
=
(
max_voxels
,
3
),
dtype
=
np
.
int32
)
if
reverse_index
:
voxel_num
=
_points_to_voxel_reverse_kernel
(
points
,
voxel_size
,
coors_range
,
num_points_per_voxel
,
coor_to_voxelidx
,
voxels
,
coors
,
max_points
,
max_voxels
)
else
:
voxel_num
=
_points_to_voxel_kernel
(
points
,
voxel_size
,
coors_range
,
num_points_per_voxel
,
coor_to_voxelidx
,
voxels
,
coors
,
max_points
,
max_voxels
)
coors
=
coors
[:
voxel_num
]
voxels
=
voxels
[:
voxel_num
]
num_points_per_voxel
=
num_points_per_voxel
[:
voxel_num
]
return
voxels
,
coors
,
num_points_per_voxel
@
numba
.
jit
(
nopython
=
True
)
def
_points_to_voxel_reverse_kernel
(
points
,
voxel_size
,
coors_range
,
num_points_per_voxel
,
coor_to_voxelidx
,
voxels
,
coors
,
max_points
=
35
,
max_voxels
=
20000
):
"""convert kitti points(N, >=3) to voxels.
Args:
points (np.ndarray): [N, ndim]. points[:, :3] contain xyz points and
points[:, 3:] contain other information such as reflectivity.
voxel_size (list, tuple, np.ndarray): [3] xyz, indicate voxel size
coors_range (list[float | tuple[float] | ndarray]): Range of voxels.
format: xyzxyz, minmax
num_points_per_voxel (int): Number of points per voxel.
coor_to_voxel_idx (np.ndarray): A voxel grid of shape (D, H, W),
which has the same shape as the complete voxel map. It indicates
the index of each corresponding voxel.
voxels (np.ndarray): Created empty voxels.
coors (np.ndarray): Created coordinates of each voxel.
max_points (int): Indicate maximum points contained in a voxel.
max_voxels (int): Maximum number of voxels this function create.
for second, 20000 is a good choice. Points should be shuffled for
randomness before this function because max_voxels drops points.
Returns:
tuple[np.ndarray]:
voxels: Shape [M, max_points, ndim], only contain points.
coordinates: Shape [M, 3].
num_points_per_voxel: Shape [M].
"""
# put all computations to one loop.
# we shouldn't create large array in main jit code, otherwise
# reduce performance
N
=
points
.
shape
[
0
]
# ndim = points.shape[1] - 1
ndim
=
3
ndim_minus_1
=
ndim
-
1
grid_size
=
(
coors_range
[
3
:]
-
coors_range
[:
3
])
/
voxel_size
# np.round(grid_size)
# grid_size = np.round(grid_size).astype(np.int64)(np.int32)
grid_size
=
np
.
round
(
grid_size
,
0
,
grid_size
).
astype
(
np
.
int32
)
coor
=
np
.
zeros
(
shape
=
(
3
,
),
dtype
=
np
.
int32
)
voxel_num
=
0
failed
=
False
for
i
in
range
(
N
):
failed
=
False
for
j
in
range
(
ndim
):
c
=
np
.
floor
((
points
[
i
,
j
]
-
coors_range
[
j
])
/
voxel_size
[
j
])
if
c
<
0
or
c
>=
grid_size
[
j
]:
failed
=
True
break
coor
[
ndim_minus_1
-
j
]
=
c
if
failed
:
continue
voxelidx
=
coor_to_voxelidx
[
coor
[
0
],
coor
[
1
],
coor
[
2
]]
if
voxelidx
==
-
1
:
voxelidx
=
voxel_num
if
voxel_num
>=
max_voxels
:
continue
voxel_num
+=
1
coor_to_voxelidx
[
coor
[
0
],
coor
[
1
],
coor
[
2
]]
=
voxelidx
coors
[
voxelidx
]
=
coor
num
=
num_points_per_voxel
[
voxelidx
]
if
num
<
max_points
:
voxels
[
voxelidx
,
num
]
=
points
[
i
]
num_points_per_voxel
[
voxelidx
]
+=
1
return
voxel_num
@
numba
.
jit
(
nopython
=
True
)
def
_points_to_voxel_kernel
(
points
,
voxel_size
,
coors_range
,
num_points_per_voxel
,
coor_to_voxelidx
,
voxels
,
coors
,
max_points
=
35
,
max_voxels
=
20000
):
"""convert kitti points(N, >=3) to voxels.
Args:
points (np.ndarray): [N, ndim]. points[:, :3] contain xyz points and
points[:, 3:] contain other information such as reflectivity.
voxel_size (list, tuple, np.ndarray): [3] xyz, indicate voxel size.
coors_range (list[float | tuple[float] | ndarray]): Range of voxels.
format: xyzxyz, minmax
num_points_per_voxel (int): Number of points per voxel.
coor_to_voxel_idx (np.ndarray): A voxel grid of shape (D, H, W),
which has the same shape as the complete voxel map. It indicates
the index of each corresponding voxel.
voxels (np.ndarray): Created empty voxels.
coors (np.ndarray): Created coordinates of each voxel.
max_points (int): Indicate maximum points contained in a voxel.
max_voxels (int): Maximum number of voxels this function create.
for second, 20000 is a good choice. Points should be shuffled for
randomness before this function because max_voxels drops points.
Returns:
tuple[np.ndarray]:
voxels: Shape [M, max_points, ndim], only contain points.
coordinates: Shape [M, 3].
num_points_per_voxel: Shape [M].
"""
N
=
points
.
shape
[
0
]
# ndim = points.shape[1] - 1
ndim
=
3
grid_size
=
(
coors_range
[
3
:]
-
coors_range
[:
3
])
/
voxel_size
# grid_size = np.round(grid_size).astype(np.int64)(np.int32)
grid_size
=
np
.
round
(
grid_size
,
0
,
grid_size
).
astype
(
np
.
int32
)
# lower_bound = coors_range[:3]
# upper_bound = coors_range[3:]
coor
=
np
.
zeros
(
shape
=
(
3
,
),
dtype
=
np
.
int32
)
voxel_num
=
0
failed
=
False
for
i
in
range
(
N
):
failed
=
False
for
j
in
range
(
ndim
):
c
=
np
.
floor
((
points
[
i
,
j
]
-
coors_range
[
j
])
/
voxel_size
[
j
])
if
c
<
0
or
c
>=
grid_size
[
j
]:
failed
=
True
break
coor
[
j
]
=
c
if
failed
:
continue
voxelidx
=
coor_to_voxelidx
[
coor
[
0
],
coor
[
1
],
coor
[
2
]]
if
voxelidx
==
-
1
:
voxelidx
=
voxel_num
if
voxel_num
>=
max_voxels
:
continue
voxel_num
+=
1
coor_to_voxelidx
[
coor
[
0
],
coor
[
1
],
coor
[
2
]]
=
voxelidx
coors
[
voxelidx
]
=
coor
num
=
num_points_per_voxel
[
voxelidx
]
if
num
<
max_points
:
voxels
[
voxelidx
,
num
]
=
points
[
i
]
num_points_per_voxel
[
voxelidx
]
+=
1
return
voxel_num
docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/__init__.py
0 → 100644
View file @
ba3cd005
# Copyright (c) OpenMMLab. All rights reserved.
from
mmdet.datasets.builder
import
build_dataloader
from
.builder
import
DATASETS
,
PIPELINES
,
build_dataset
from
.custom_3d
import
Custom3DDataset
from
.custom_3d_seg
import
Custom3DSegDataset
from
.kitti_dataset
import
KittiDataset
from
.kitti_mono_dataset
import
KittiMonoDataset
from
.lyft_dataset
import
LyftDataset
from
.nuscenes_dataset
import
NuScenesDataset
from
.nuscenes_mono_dataset
import
NuScenesMonoDataset
# yapf: disable
from
.pipelines
import
(
AffineResize
,
BackgroundPointsFilter
,
GlobalAlignment
,
GlobalRotScaleTrans
,
IndoorPatchPointSample
,
IndoorPointSample
,
LoadAnnotations3D
,
LoadPointsFromDict
,
LoadPointsFromFile
,
LoadPointsFromMultiSweeps
,
MultiViewWrapper
,
NormalizePointsColor
,
ObjectNameFilter
,
ObjectNoise
,
ObjectRangeFilter
,
ObjectSample
,
PointSample
,
PointShuffle
,
PointsRangeFilter
,
RandomDropPointsColor
,
RandomFlip3D
,
RandomJitterPoints
,
RandomRotate
,
RandomShiftScale
,
RangeLimitedRandomCrop
,
VoxelBasedPointSampler
)
# yapf: enable
from
.s3dis_dataset
import
S3DISDataset
,
S3DISSegDataset
from
.scannet_dataset
import
(
ScanNetDataset
,
ScanNetInstanceSegDataset
,
ScanNetSegDataset
)
from
.semantickitti_dataset
import
SemanticKITTIDataset
from
.sunrgbd_dataset
import
SUNRGBDDataset
from
.utils
import
get_loading_pipeline
from
.waymo_dataset
import
WaymoDataset
__all__
=
[
'KittiDataset'
,
'KittiMonoDataset'
,
'build_dataloader'
,
'DATASETS'
,
'build_dataset'
,
'NuScenesDataset'
,
'NuScenesMonoDataset'
,
'LyftDataset'
,
'ObjectSample'
,
'RandomFlip3D'
,
'ObjectNoise'
,
'GlobalRotScaleTrans'
,
'PointShuffle'
,
'ObjectRangeFilter'
,
'PointsRangeFilter'
,
'LoadPointsFromFile'
,
'S3DISSegDataset'
,
'S3DISDataset'
,
'NormalizePointsColor'
,
'IndoorPatchPointSample'
,
'IndoorPointSample'
,
'PointSample'
,
'LoadAnnotations3D'
,
'GlobalAlignment'
,
'SUNRGBDDataset'
,
'ScanNetDataset'
,
'ScanNetSegDataset'
,
'ScanNetInstanceSegDataset'
,
'SemanticKITTIDataset'
,
'Custom3DDataset'
,
'Custom3DSegDataset'
,
'LoadPointsFromMultiSweeps'
,
'WaymoDataset'
,
'BackgroundPointsFilter'
,
'VoxelBasedPointSampler'
,
'get_loading_pipeline'
,
'RandomDropPointsColor'
,
'RandomJitterPoints'
,
'ObjectNameFilter'
,
'AffineResize'
,
'RandomShiftScale'
,
'LoadPointsFromDict'
,
'PIPELINES'
,
'RangeLimitedRandomCrop'
,
'RandomRotate'
,
'MultiViewWrapper'
]
docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/builder.py
0 → 100644
View file @
ba3cd005
# Copyright (c) OpenMMLab. All rights reserved.
import
platform
from
mmcv.utils
import
Registry
,
build_from_cfg
from
mmdet.datasets
import
DATASETS
as
MMDET_DATASETS
from
mmdet.datasets.builder
import
_concat_dataset
if
platform
.
system
()
!=
'Windows'
:
# https://github.com/pytorch/pytorch/issues/973
import
resource
rlimit
=
resource
.
getrlimit
(
resource
.
RLIMIT_NOFILE
)
base_soft_limit
=
rlimit
[
0
]
hard_limit
=
rlimit
[
1
]
soft_limit
=
min
(
max
(
4096
,
base_soft_limit
),
hard_limit
)
resource
.
setrlimit
(
resource
.
RLIMIT_NOFILE
,
(
soft_limit
,
hard_limit
))
OBJECTSAMPLERS
=
Registry
(
'Object sampler'
)
DATASETS
=
Registry
(
'dataset'
)
PIPELINES
=
Registry
(
'pipeline'
)
def
build_dataset
(
cfg
,
default_args
=
None
):
from
mmdet3d.datasets.dataset_wrappers
import
CBGSDataset
from
mmdet.datasets.dataset_wrappers
import
(
ClassBalancedDataset
,
ConcatDataset
,
RepeatDataset
)
if
isinstance
(
cfg
,
(
list
,
tuple
)):
dataset
=
ConcatDataset
([
build_dataset
(
c
,
default_args
)
for
c
in
cfg
])
elif
cfg
[
'type'
]
==
'ConcatDataset'
:
dataset
=
ConcatDataset
(
[
build_dataset
(
c
,
default_args
)
for
c
in
cfg
[
'datasets'
]],
cfg
.
get
(
'separate_eval'
,
True
))
elif
cfg
[
'type'
]
==
'RepeatDataset'
:
dataset
=
RepeatDataset
(
build_dataset
(
cfg
[
'dataset'
],
default_args
),
cfg
[
'times'
])
elif
cfg
[
'type'
]
==
'ClassBalancedDataset'
:
dataset
=
ClassBalancedDataset
(
build_dataset
(
cfg
[
'dataset'
],
default_args
),
cfg
[
'oversample_thr'
])
elif
cfg
[
'type'
]
==
'CBGSDataset'
:
dataset
=
CBGSDataset
(
build_dataset
(
cfg
[
'dataset'
],
default_args
))
elif
isinstance
(
cfg
.
get
(
'ann_file'
),
(
list
,
tuple
)):
dataset
=
_concat_dataset
(
cfg
,
default_args
)
elif
cfg
[
'type'
]
in
DATASETS
.
_module_dict
.
keys
():
dataset
=
build_from_cfg
(
cfg
,
DATASETS
,
default_args
)
else
:
dataset
=
build_from_cfg
(
cfg
,
MMDET_DATASETS
,
default_args
)
return
dataset
docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/custom_3d.py
0 → 100644
View file @
ba3cd005
# Copyright (c) OpenMMLab. All rights reserved.
import
tempfile
import
warnings
from
os
import
path
as
osp
import
mmcv
import
numpy
as
np
from
torch.utils.data
import
Dataset
from
..core.bbox
import
get_box_type
from
.builder
import
DATASETS
from
.pipelines
import
Compose
from
.utils
import
extract_result_dict
,
get_loading_pipeline
@
DATASETS
.
register_module
()
class
Custom3DDataset
(
Dataset
):
"""Customized 3D dataset.
This is the base dataset of SUNRGB-D, ScanNet, nuScenes, and KITTI
dataset.
.. code-block:: none
[
{'sample_idx':
'lidar_points': {'lidar_path': velodyne_path,
....
},
'annos': {'box_type_3d': (str) 'LiDAR/Camera/Depth'
'gt_bboxes_3d': <np.ndarray> (n, 7)
'gt_names': [list]
....
}
'calib': { .....}
'images': { .....}
}
]
Args:
data_root (str): Path of dataset root.
ann_file (str): Path of annotation file.
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
classes (tuple[str], optional): Classes used in the dataset.
Defaults to None.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to None.
box_type_3d (str, optional): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR'. Available options includes
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates.
filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
"""
def
__init__
(
self
,
data_root
,
ann_file
,
pipeline
=
None
,
classes
=
None
,
modality
=
None
,
box_type_3d
=
'LiDAR'
,
filter_empty_gt
=
True
,
test_mode
=
False
,
file_client_args
=
dict
(
backend
=
'disk'
)):
super
().
__init__
()
self
.
data_root
=
data_root
self
.
ann_file
=
ann_file
self
.
test_mode
=
test_mode
self
.
modality
=
modality
self
.
filter_empty_gt
=
filter_empty_gt
self
.
box_type_3d
,
self
.
box_mode_3d
=
get_box_type
(
box_type_3d
)
self
.
CLASSES
=
self
.
get_classes
(
classes
)
self
.
file_client
=
mmcv
.
FileClient
(
**
file_client_args
)
self
.
cat2id
=
{
name
:
i
for
i
,
name
in
enumerate
(
self
.
CLASSES
)}
# load annotations
if
hasattr
(
self
.
file_client
,
'get_local_path'
):
with
self
.
file_client
.
get_local_path
(
self
.
ann_file
)
as
local_path
:
self
.
data_infos
=
self
.
load_annotations
(
open
(
local_path
,
'rb'
))
else
:
warnings
.
warn
(
'The used MMCV version does not have get_local_path. '
f
'We treat the
{
self
.
ann_file
}
as local paths and it '
'might cause errors if the path is not a local path. '
'Please use MMCV>= 1.3.16 if you meet errors.'
)
self
.
data_infos
=
self
.
load_annotations
(
self
.
ann_file
)
# process pipeline
if
pipeline
is
not
None
:
self
.
pipeline
=
Compose
(
pipeline
)
# set group flag for the samplers
if
not
self
.
test_mode
:
self
.
_set_group_flag
()
def
load_annotations
(
self
,
ann_file
):
"""Load annotations from ann_file.
Args:
ann_file (str): Path of the annotation file.
Returns:
list[dict]: List of annotations.
"""
# loading data from a file-like object needs file format
return
mmcv
.
load
(
ann_file
,
file_format
=
'pkl'
)
def
get_data_info
(
self
,
index
):
"""Get data info according to the given index.
Args:
index (int): Index of the sample data to get.
Returns:
dict: Data information that will be passed to the data
preprocessing pipelines. It includes the following keys:
- sample_idx (str): Sample index.
- pts_filename (str): Filename of point clouds.
- file_name (str): Filename of point clouds.
- ann_info (dict): Annotation info.
"""
info
=
self
.
data_infos
[
index
]
sample_idx
=
info
[
'sample_idx'
]
pts_filename
=
osp
.
join
(
self
.
data_root
,
info
[
'lidar_points'
][
'lidar_path'
])
input_dict
=
dict
(
pts_filename
=
pts_filename
,
sample_idx
=
sample_idx
,
file_name
=
pts_filename
)
if
not
self
.
test_mode
:
annos
=
self
.
get_ann_info
(
index
)
input_dict
[
'ann_info'
]
=
annos
if
self
.
filter_empty_gt
and
~
(
annos
[
'gt_labels_3d'
]
!=
-
1
).
any
():
return
None
return
input_dict
def
get_ann_info
(
self
,
index
):
"""Get annotation info according to the given index.
Args:
index (int): Index of the annotation data to get.
Returns:
dict: Annotation information consists of the following keys:
- gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`):
3D ground truth bboxes
- gt_labels_3d (np.ndarray): Labels of ground truths.
- gt_names (list[str]): Class names of ground truths.
"""
info
=
self
.
data_infos
[
index
]
gt_bboxes_3d
=
info
[
'annos'
][
'gt_bboxes_3d'
]
gt_names_3d
=
info
[
'annos'
][
'gt_names'
]
gt_labels_3d
=
[]
for
cat
in
gt_names_3d
:
if
cat
in
self
.
CLASSES
:
gt_labels_3d
.
append
(
self
.
CLASSES
.
index
(
cat
))
else
:
gt_labels_3d
.
append
(
-
1
)
gt_labels_3d
=
np
.
array
(
gt_labels_3d
)
# Obtain original box 3d type in info file
ori_box_type_3d
=
info
[
'annos'
][
'box_type_3d'
]
ori_box_type_3d
,
_
=
get_box_type
(
ori_box_type_3d
)
# turn original box type to target box type
gt_bboxes_3d
=
ori_box_type_3d
(
gt_bboxes_3d
,
box_dim
=
gt_bboxes_3d
.
shape
[
-
1
],
origin
=
(
0.5
,
0.5
,
0.5
)).
convert_to
(
self
.
box_mode_3d
)
anns_results
=
dict
(
gt_bboxes_3d
=
gt_bboxes_3d
,
gt_labels_3d
=
gt_labels_3d
,
gt_names
=
gt_names_3d
)
return
anns_results
def
pre_pipeline
(
self
,
results
):
"""Initialization before data preparation.
Args:
results (dict): Dict before data preprocessing.
- img_fields (list): Image fields.
- bbox3d_fields (list): 3D bounding boxes fields.
- pts_mask_fields (list): Mask fields of points.
- pts_seg_fields (list): Mask fields of point segments.
- bbox_fields (list): Fields of bounding boxes.
- mask_fields (list): Fields of masks.
- seg_fields (list): Segment fields.
- box_type_3d (str): 3D box type.
- box_mode_3d (str): 3D box mode.
"""
results
[
'img_fields'
]
=
[]
results
[
'bbox3d_fields'
]
=
[]
results
[
'pts_mask_fields'
]
=
[]
results
[
'pts_seg_fields'
]
=
[]
results
[
'bbox_fields'
]
=
[]
results
[
'mask_fields'
]
=
[]
results
[
'seg_fields'
]
=
[]
results
[
'box_type_3d'
]
=
self
.
box_type_3d
results
[
'box_mode_3d'
]
=
self
.
box_mode_3d
def
prepare_train_data
(
self
,
index
):
"""Training data preparation.
Args:
index (int): Index for accessing the target data.
Returns:
dict: Training data dict of the corresponding index.
"""
input_dict
=
self
.
get_data_info
(
index
)
if
input_dict
is
None
:
return
None
self
.
pre_pipeline
(
input_dict
)
example
=
self
.
pipeline
(
input_dict
)
if
self
.
filter_empty_gt
and
\
(
example
is
None
or
~
(
example
[
'gt_labels_3d'
].
_data
!=
-
1
).
any
()):
return
None
return
example
def
prepare_test_data
(
self
,
index
):
"""Prepare data for testing.
Args:
index (int): Index for accessing the target data.
Returns:
dict: Testing data dict of the corresponding index.
"""
input_dict
=
self
.
get_data_info
(
index
)
self
.
pre_pipeline
(
input_dict
)
example
=
self
.
pipeline
(
input_dict
)
return
example
@
classmethod
def
get_classes
(
cls
,
classes
=
None
):
"""Get class names of current dataset.
Args:
classes (Sequence[str] | str): If classes is None, use
default CLASSES defined by builtin dataset. If classes is a
string, take it as a file name. The file contains the name of
classes where each line contains one class name. If classes is
a tuple or list, override the CLASSES defined by the dataset.
Return:
list[str]: A list of class names.
"""
if
classes
is
None
:
return
cls
.
CLASSES
if
isinstance
(
classes
,
str
):
# take it as a file path
class_names
=
mmcv
.
list_from_file
(
classes
)
elif
isinstance
(
classes
,
(
tuple
,
list
)):
class_names
=
classes
else
:
raise
ValueError
(
f
'Unsupported type
{
type
(
classes
)
}
of classes.'
)
return
class_names
def
format_results
(
self
,
outputs
,
pklfile_prefix
=
None
,
submission_prefix
=
None
):
"""Format the results to pkl file.
Args:
outputs (list[dict]): Testing results of the dataset.
pklfile_prefix (str): The prefix of pkl files. It includes
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
Returns:
tuple: (outputs, tmp_dir), outputs is the detection results,
tmp_dir is the temporal directory created for saving json
files when ``jsonfile_prefix`` is not specified.
"""
if
pklfile_prefix
is
None
:
tmp_dir
=
tempfile
.
TemporaryDirectory
()
pklfile_prefix
=
osp
.
join
(
tmp_dir
.
name
,
'results'
)
out
=
f
'
{
pklfile_prefix
}
.pkl'
mmcv
.
dump
(
outputs
,
out
)
return
outputs
,
tmp_dir
def
evaluate
(
self
,
results
,
metric
=
None
,
iou_thr
=
(
0.25
,
0.5
),
logger
=
None
,
show
=
False
,
out_dir
=
None
,
pipeline
=
None
):
"""Evaluate.
Evaluation in indoor protocol.
Args:
results (list[dict]): List of results.
metric (str | list[str], optional): Metrics to be evaluated.
Defaults to None.
iou_thr (list[float]): AP IoU thresholds. Defaults to (0.25, 0.5).
logger (logging.Logger | str, optional): Logger used for printing
related information during evaluation. Defaults to None.
show (bool, optional): Whether to visualize.
Default: False.
out_dir (str, optional): Path to save the visualization results.
Default: None.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
Returns:
dict: Evaluation results.
"""
from
mmdet3d.core.evaluation
import
indoor_eval
assert
isinstance
(
results
,
list
),
f
'Expect results to be list, got
{
type
(
results
)
}
.'
assert
len
(
results
)
>
0
,
'Expect length of results > 0.'
assert
len
(
results
)
==
len
(
self
.
data_infos
)
assert
isinstance
(
results
[
0
],
dict
),
f
'Expect elements in results to be dict, got
{
type
(
results
[
0
])
}
.'
gt_annos
=
[
info
[
'annos'
]
for
info
in
self
.
data_infos
]
label2cat
=
{
i
:
cat_id
for
i
,
cat_id
in
enumerate
(
self
.
CLASSES
)}
ret_dict
=
indoor_eval
(
gt_annos
,
results
,
iou_thr
,
label2cat
,
logger
=
logger
,
box_type_3d
=
self
.
box_type_3d
,
box_mode_3d
=
self
.
box_mode_3d
)
if
show
:
self
.
show
(
results
,
out_dir
,
pipeline
=
pipeline
)
return
ret_dict
def
_build_default_pipeline
(
self
):
"""Build the default pipeline for this dataset."""
raise
NotImplementedError
(
'_build_default_pipeline is not implemented '
f
'for dataset
{
self
.
__class__
.
__name__
}
'
)
def
_get_pipeline
(
self
,
pipeline
):
"""Get data loading pipeline in self.show/evaluate function.
Args:
pipeline (list[dict]): Input pipeline. If None is given,
get from self.pipeline.
"""
if
pipeline
is
None
:
if
not
hasattr
(
self
,
'pipeline'
)
or
self
.
pipeline
is
None
:
warnings
.
warn
(
'Use default pipeline for data loading, this may cause '
'errors when data is on ceph'
)
return
self
.
_build_default_pipeline
()
loading_pipeline
=
get_loading_pipeline
(
self
.
pipeline
.
transforms
)
return
Compose
(
loading_pipeline
)
return
Compose
(
pipeline
)
def
_extract_data
(
self
,
index
,
pipeline
,
key
,
load_annos
=
False
):
"""Load data using input pipeline and extract data according to key.
Args:
index (int): Index for accessing the target data.
pipeline (:obj:`Compose`): Composed data loading pipeline.
key (str | list[str]): One single or a list of data key.
load_annos (bool): Whether to load data annotations.
If True, need to set self.test_mode as False before loading.
Returns:
np.ndarray | torch.Tensor | list[np.ndarray | torch.Tensor]:
A single or a list of loaded data.
"""
assert
pipeline
is
not
None
,
'data loading pipeline is not provided'
# when we want to load ground-truth via pipeline (e.g. bbox, seg mask)
# we need to set self.test_mode as False so that we have 'annos'
if
load_annos
:
original_test_mode
=
self
.
test_mode
self
.
test_mode
=
False
input_dict
=
self
.
get_data_info
(
index
)
self
.
pre_pipeline
(
input_dict
)
example
=
pipeline
(
input_dict
)
# extract data items according to keys
if
isinstance
(
key
,
str
):
data
=
extract_result_dict
(
example
,
key
)
else
:
data
=
[
extract_result_dict
(
example
,
k
)
for
k
in
key
]
if
load_annos
:
self
.
test_mode
=
original_test_mode
return
data
def
__len__
(
self
):
"""Return the length of data infos.
Returns:
int: Length of data infos.
"""
return
len
(
self
.
data_infos
)
def
_rand_another
(
self
,
idx
):
"""Randomly get another item with the same flag.
Returns:
int: Another index of item with the same flag.
"""
pool
=
np
.
where
(
self
.
flag
==
self
.
flag
[
idx
])[
0
]
return
np
.
random
.
choice
(
pool
)
def
__getitem__
(
self
,
idx
):
"""Get item from infos according to the given index.
Returns:
dict: Data dictionary of the corresponding index.
"""
if
self
.
test_mode
:
return
self
.
prepare_test_data
(
idx
)
while
True
:
data
=
self
.
prepare_train_data
(
idx
)
if
data
is
None
:
idx
=
self
.
_rand_another
(
idx
)
continue
return
data
def
_set_group_flag
(
self
):
"""Set flag according to image aspect ratio.
Images with aspect ratio greater than 1 will be set as group 1,
otherwise group 0. In 3D datasets, they are all the same, thus are all
zeros.
"""
self
.
flag
=
np
.
zeros
(
len
(
self
),
dtype
=
np
.
uint8
)
docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/custom_3d_seg.py
0 → 100644
View file @
ba3cd005
# Copyright (c) OpenMMLab. All rights reserved.
import
tempfile
import
warnings
from
os
import
path
as
osp
import
mmcv
import
numpy
as
np
from
torch.utils.data
import
Dataset
from
mmseg.datasets
import
DATASETS
as
SEG_DATASETS
from
.builder
import
DATASETS
from
.pipelines
import
Compose
from
.utils
import
extract_result_dict
,
get_loading_pipeline
@
DATASETS
.
register_module
()
@
SEG_DATASETS
.
register_module
()
class
Custom3DSegDataset
(
Dataset
):
"""Customized 3D dataset for semantic segmentation task.
This is the base dataset of ScanNet and S3DIS dataset.
Args:
data_root (str): Path of dataset root.
ann_file (str): Path of annotation file.
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
classes (tuple[str], optional): Classes used in the dataset.
Defaults to None.
palette (list[list[int]], optional): The palette of segmentation map.
Defaults to None.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to None.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
ignore_index (int, optional): The label index to be ignored, e.g.
unannotated points. If None is given, set to len(self.CLASSES) to
be consistent with PointSegClassMapping function in pipeline.
Defaults to None.
scene_idxs (np.ndarray | str, optional): Precomputed index to load
data. For scenes with many points, we may sample it several times.
Defaults to None.
"""
# names of all classes data used for the task
CLASSES
=
None
# class_ids used for training
VALID_CLASS_IDS
=
None
# all possible class_ids in loaded segmentation mask
ALL_CLASS_IDS
=
None
# official color for visualization
PALETTE
=
None
def
__init__
(
self
,
data_root
,
ann_file
,
pipeline
=
None
,
classes
=
None
,
palette
=
None
,
modality
=
None
,
test_mode
=
False
,
ignore_index
=
None
,
scene_idxs
=
None
,
file_client_args
=
dict
(
backend
=
'disk'
)):
super
().
__init__
()
self
.
data_root
=
data_root
self
.
ann_file
=
ann_file
self
.
test_mode
=
test_mode
self
.
modality
=
modality
self
.
file_client
=
mmcv
.
FileClient
(
**
file_client_args
)
# load annotations
if
hasattr
(
self
.
file_client
,
'get_local_path'
):
with
self
.
file_client
.
get_local_path
(
self
.
ann_file
)
as
local_path
:
self
.
data_infos
=
self
.
load_annotations
(
open
(
local_path
,
'rb'
))
else
:
warnings
.
warn
(
'The used MMCV version does not have get_local_path. '
f
'We treat the
{
self
.
ann_file
}
as local paths and it '
'might cause errors if the path is not a local path. '
'Please use MMCV>= 1.3.16 if you meet errors.'
)
self
.
data_infos
=
self
.
load_annotations
(
self
.
ann_file
)
if
pipeline
is
not
None
:
self
.
pipeline
=
Compose
(
pipeline
)
self
.
ignore_index
=
len
(
self
.
CLASSES
)
if
\
ignore_index
is
None
else
ignore_index
self
.
scene_idxs
=
self
.
get_scene_idxs
(
scene_idxs
)
self
.
CLASSES
,
self
.
PALETTE
=
\
self
.
get_classes_and_palette
(
classes
,
palette
)
# set group flag for the sampler
if
not
self
.
test_mode
:
self
.
_set_group_flag
()
def
load_annotations
(
self
,
ann_file
):
"""Load annotations from ann_file.
Args:
ann_file (str): Path of the annotation file.
Returns:
list[dict]: List of annotations.
"""
# loading data from a file-like object needs file format
return
mmcv
.
load
(
ann_file
,
file_format
=
'pkl'
)
def
get_data_info
(
self
,
index
):
"""Get data info according to the given index.
Args:
index (int): Index of the sample data to get.
Returns:
dict: Data information that will be passed to the data
preprocessing pipelines. It includes the following keys:
- sample_idx (str): Sample index.
- pts_filename (str): Filename of point clouds.
- file_name (str): Filename of point clouds.
- ann_info (dict): Annotation info.
"""
info
=
self
.
data_infos
[
index
]
sample_idx
=
info
[
'point_cloud'
][
'lidar_idx'
]
pts_filename
=
osp
.
join
(
self
.
data_root
,
info
[
'pts_path'
])
input_dict
=
dict
(
pts_filename
=
pts_filename
,
sample_idx
=
sample_idx
,
file_name
=
pts_filename
)
if
not
self
.
test_mode
:
annos
=
self
.
get_ann_info
(
index
)
input_dict
[
'ann_info'
]
=
annos
return
input_dict
def
pre_pipeline
(
self
,
results
):
"""Initialization before data preparation.
Args:
results (dict): Dict before data preprocessing.
- img_fields (list): Image fields.
- pts_mask_fields (list): Mask fields of points.
- pts_seg_fields (list): Mask fields of point segments.
- mask_fields (list): Fields of masks.
- seg_fields (list): Segment fields.
"""
results
[
'img_fields'
]
=
[]
results
[
'pts_mask_fields'
]
=
[]
results
[
'pts_seg_fields'
]
=
[]
results
[
'mask_fields'
]
=
[]
results
[
'seg_fields'
]
=
[]
results
[
'bbox3d_fields'
]
=
[]
def
prepare_train_data
(
self
,
index
):
"""Training data preparation.
Args:
index (int): Index for accessing the target data.
Returns:
dict: Training data dict of the corresponding index.
"""
input_dict
=
self
.
get_data_info
(
index
)
if
input_dict
is
None
:
return
None
self
.
pre_pipeline
(
input_dict
)
example
=
self
.
pipeline
(
input_dict
)
return
example
def
prepare_test_data
(
self
,
index
):
"""Prepare data for testing.
Args:
index (int): Index for accessing the target data.
Returns:
dict: Testing data dict of the corresponding index.
"""
input_dict
=
self
.
get_data_info
(
index
)
self
.
pre_pipeline
(
input_dict
)
example
=
self
.
pipeline
(
input_dict
)
return
example
def
get_classes_and_palette
(
self
,
classes
=
None
,
palette
=
None
):
"""Get class names of current dataset.
This function is taken from MMSegmentation.
Args:
classes (Sequence[str] | str): If classes is None, use
default CLASSES defined by builtin dataset. If classes is a
string, take it as a file name. The file contains the name of
classes where each line contains one class name. If classes is
a tuple or list, override the CLASSES defined by the dataset.
Defaults to None.
palette (Sequence[Sequence[int]]] | np.ndarray):
The palette of segmentation map. If None is given, random
palette will be generated. Defaults to None.
"""
if
classes
is
None
:
self
.
custom_classes
=
False
# map id in the loaded mask to label used for training
self
.
label_map
=
{
cls_id
:
self
.
ignore_index
for
cls_id
in
self
.
ALL_CLASS_IDS
}
self
.
label_map
.
update
(
{
cls_id
:
i
for
i
,
cls_id
in
enumerate
(
self
.
VALID_CLASS_IDS
)})
# map label to category name
self
.
label2cat
=
{
i
:
cat_name
for
i
,
cat_name
in
enumerate
(
self
.
CLASSES
)
}
return
self
.
CLASSES
,
self
.
PALETTE
self
.
custom_classes
=
True
if
isinstance
(
classes
,
str
):
# take it as a file path
class_names
=
mmcv
.
list_from_file
(
classes
)
elif
isinstance
(
classes
,
(
tuple
,
list
)):
class_names
=
classes
else
:
raise
ValueError
(
f
'Unsupported type
{
type
(
classes
)
}
of classes.'
)
if
self
.
CLASSES
:
if
not
set
(
class_names
).
issubset
(
self
.
CLASSES
):
raise
ValueError
(
'classes is not a subset of CLASSES.'
)
# update valid_class_ids
self
.
VALID_CLASS_IDS
=
[
self
.
VALID_CLASS_IDS
[
self
.
CLASSES
.
index
(
cls_name
)]
for
cls_name
in
class_names
]
# dictionary, its keys are the old label ids and its values
# are the new label ids.
# used for changing pixel labels in load_annotations.
self
.
label_map
=
{
cls_id
:
self
.
ignore_index
for
cls_id
in
self
.
ALL_CLASS_IDS
}
self
.
label_map
.
update
(
{
cls_id
:
i
for
i
,
cls_id
in
enumerate
(
self
.
VALID_CLASS_IDS
)})
self
.
label2cat
=
{
i
:
cat_name
for
i
,
cat_name
in
enumerate
(
class_names
)
}
# modify palette for visualization
palette
=
[
self
.
PALETTE
[
self
.
CLASSES
.
index
(
cls_name
)]
for
cls_name
in
class_names
]
return
class_names
,
palette
def
get_scene_idxs
(
self
,
scene_idxs
):
"""Compute scene_idxs for data sampling.
We sample more times for scenes with more points.
"""
if
self
.
test_mode
:
# when testing, we load one whole scene every time
return
np
.
arange
(
len
(
self
.
data_infos
)).
astype
(
np
.
int32
)
# we may need to re-sample different scenes according to scene_idxs
# this is necessary for indoor scene segmentation such as ScanNet
if
scene_idxs
is
None
:
scene_idxs
=
np
.
arange
(
len
(
self
.
data_infos
))
if
isinstance
(
scene_idxs
,
str
):
with
self
.
file_client
.
get_local_path
(
scene_idxs
)
as
local_path
:
scene_idxs
=
np
.
load
(
local_path
)
else
:
scene_idxs
=
np
.
array
(
scene_idxs
)
return
scene_idxs
.
astype
(
np
.
int32
)
def
format_results
(
self
,
outputs
,
pklfile_prefix
=
None
,
submission_prefix
=
None
):
"""Format the results to pkl file.
Args:
outputs (list[dict]): Testing results of the dataset.
pklfile_prefix (str): The prefix of pkl files. It includes
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
Returns:
tuple: (outputs, tmp_dir), outputs is the detection results,
tmp_dir is the temporal directory created for saving json
files when ``jsonfile_prefix`` is not specified.
"""
if
pklfile_prefix
is
None
:
tmp_dir
=
tempfile
.
TemporaryDirectory
()
pklfile_prefix
=
osp
.
join
(
tmp_dir
.
name
,
'results'
)
out
=
f
'
{
pklfile_prefix
}
.pkl'
mmcv
.
dump
(
outputs
,
out
)
return
outputs
,
tmp_dir
def
evaluate
(
self
,
results
,
metric
=
None
,
logger
=
None
,
show
=
False
,
out_dir
=
None
,
pipeline
=
None
):
"""Evaluate.
Evaluation in semantic segmentation protocol.
Args:
results (list[dict]): List of results.
metric (str | list[str]): Metrics to be evaluated.
logger (logging.Logger | str, optional): Logger used for printing
related information during evaluation. Defaults to None.
show (bool, optional): Whether to visualize.
Defaults to False.
out_dir (str, optional): Path to save the visualization results.
Defaults to None.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
Returns:
dict: Evaluation results.
"""
from
mmdet3d.core.evaluation
import
seg_eval
assert
isinstance
(
results
,
list
),
f
'Expect results to be list, got
{
type
(
results
)
}
.'
assert
len
(
results
)
>
0
,
'Expect length of results > 0.'
assert
len
(
results
)
==
len
(
self
.
data_infos
)
assert
isinstance
(
results
[
0
],
dict
),
f
'Expect elements in results to be dict, got
{
type
(
results
[
0
])
}
.'
load_pipeline
=
self
.
_get_pipeline
(
pipeline
)
pred_sem_masks
=
[
result
[
'semantic_mask'
]
for
result
in
results
]
gt_sem_masks
=
[
self
.
_extract_data
(
i
,
load_pipeline
,
'pts_semantic_mask'
,
load_annos
=
True
)
for
i
in
range
(
len
(
self
.
data_infos
))
]
ret_dict
=
seg_eval
(
gt_sem_masks
,
pred_sem_masks
,
self
.
label2cat
,
self
.
ignore_index
,
logger
=
logger
)
if
show
:
self
.
show
(
pred_sem_masks
,
out_dir
,
pipeline
=
pipeline
)
return
ret_dict
def
_rand_another
(
self
,
idx
):
"""Randomly get another item with the same flag.
Returns:
int: Another index of item with the same flag.
"""
pool
=
np
.
where
(
self
.
flag
==
self
.
flag
[
idx
])[
0
]
return
np
.
random
.
choice
(
pool
)
def
_build_default_pipeline
(
self
):
"""Build the default pipeline for this dataset."""
raise
NotImplementedError
(
'_build_default_pipeline is not implemented '
f
'for dataset
{
self
.
__class__
.
__name__
}
'
)
def
_get_pipeline
(
self
,
pipeline
):
"""Get data loading pipeline in self.show/evaluate function.
Args:
pipeline (list[dict]): Input pipeline. If None is given,
get from self.pipeline.
"""
if
pipeline
is
None
:
if
not
hasattr
(
self
,
'pipeline'
)
or
self
.
pipeline
is
None
:
warnings
.
warn
(
'Use default pipeline for data loading, this may cause '
'errors when data is on ceph'
)
return
self
.
_build_default_pipeline
()
loading_pipeline
=
get_loading_pipeline
(
self
.
pipeline
.
transforms
)
return
Compose
(
loading_pipeline
)
return
Compose
(
pipeline
)
def
_extract_data
(
self
,
index
,
pipeline
,
key
,
load_annos
=
False
):
"""Load data using input pipeline and extract data according to key.
Args:
index (int): Index for accessing the target data.
pipeline (:obj:`Compose`): Composed data loading pipeline.
key (str | list[str]): One single or a list of data key.
load_annos (bool): Whether to load data annotations.
If True, need to set self.test_mode as False before loading.
Returns:
np.ndarray | torch.Tensor | list[np.ndarray | torch.Tensor]:
A single or a list of loaded data.
"""
assert
pipeline
is
not
None
,
'data loading pipeline is not provided'
# when we want to load ground-truth via pipeline (e.g. bbox, seg mask)
# we need to set self.test_mode as False so that we have 'annos'
if
load_annos
:
original_test_mode
=
self
.
test_mode
self
.
test_mode
=
False
input_dict
=
self
.
get_data_info
(
index
)
self
.
pre_pipeline
(
input_dict
)
example
=
pipeline
(
input_dict
)
# extract data items according to keys
if
isinstance
(
key
,
str
):
data
=
extract_result_dict
(
example
,
key
)
else
:
data
=
[
extract_result_dict
(
example
,
k
)
for
k
in
key
]
if
load_annos
:
self
.
test_mode
=
original_test_mode
return
data
def
__len__
(
self
):
"""Return the length of scene_idxs.
Returns:
int: Length of data infos.
"""
return
len
(
self
.
scene_idxs
)
def
__getitem__
(
self
,
idx
):
"""Get item from infos according to the given index.
In indoor scene segmentation task, each scene contains millions of
points. However, we only sample less than 10k points within a patch
each time. Therefore, we use `scene_idxs` to re-sample different rooms.
Returns:
dict: Data dictionary of the corresponding index.
"""
scene_idx
=
self
.
scene_idxs
[
idx
]
# map to scene idx
if
self
.
test_mode
:
return
self
.
prepare_test_data
(
scene_idx
)
while
True
:
data
=
self
.
prepare_train_data
(
scene_idx
)
if
data
is
None
:
idx
=
self
.
_rand_another
(
idx
)
scene_idx
=
self
.
scene_idxs
[
idx
]
# map to scene idx
continue
return
data
def
_set_group_flag
(
self
):
"""Set flag according to image aspect ratio.
Images with aspect ratio greater than 1 will be set as group 1,
otherwise group 0. In 3D datasets, they are all the same, thus are all
zeros.
"""
self
.
flag
=
np
.
zeros
(
len
(
self
),
dtype
=
np
.
uint8
)
docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/dataset_wrappers.py
0 → 100644
View file @
ba3cd005
# Copyright (c) OpenMMLab. All rights reserved.
import
numpy
as
np
from
.builder
import
DATASETS
@
DATASETS
.
register_module
()
class
CBGSDataset
(
object
):
"""A wrapper of class sampled dataset with ann_file path. Implementation of
paper `Class-balanced Grouping and Sampling for Point Cloud 3D Object
Detection <https://arxiv.org/abs/1908.09492.>`_.
Balance the number of scenes under different classes.
Args:
dataset (:obj:`CustomDataset`): The dataset to be class sampled.
"""
def
__init__
(
self
,
dataset
):
self
.
dataset
=
dataset
self
.
CLASSES
=
dataset
.
CLASSES
self
.
cat2id
=
{
name
:
i
for
i
,
name
in
enumerate
(
self
.
CLASSES
)}
self
.
sample_indices
=
self
.
_get_sample_indices
()
# self.dataset.data_infos = self.data_infos
if
hasattr
(
self
.
dataset
,
'flag'
):
self
.
flag
=
np
.
array
(
[
self
.
dataset
.
flag
[
ind
]
for
ind
in
self
.
sample_indices
],
dtype
=
np
.
uint8
)
def
_get_sample_indices
(
self
):
"""Load annotations from ann_file.
Args:
ann_file (str): Path of the annotation file.
Returns:
list[dict]: List of annotations after class sampling.
"""
class_sample_idxs
=
{
cat_id
:
[]
for
cat_id
in
self
.
cat2id
.
values
()}
for
idx
in
range
(
len
(
self
.
dataset
)):
sample_cat_ids
=
self
.
dataset
.
get_cat_ids
(
idx
)
for
cat_id
in
sample_cat_ids
:
class_sample_idxs
[
cat_id
].
append
(
idx
)
duplicated_samples
=
sum
(
[
len
(
v
)
for
_
,
v
in
class_sample_idxs
.
items
()])
class_distribution
=
{
k
:
len
(
v
)
/
duplicated_samples
for
k
,
v
in
class_sample_idxs
.
items
()
}
sample_indices
=
[]
frac
=
1.0
/
len
(
self
.
CLASSES
)
ratios
=
[
frac
/
v
for
v
in
class_distribution
.
values
()]
for
cls_inds
,
ratio
in
zip
(
list
(
class_sample_idxs
.
values
()),
ratios
):
sample_indices
+=
np
.
random
.
choice
(
cls_inds
,
int
(
len
(
cls_inds
)
*
ratio
)).
tolist
()
return
sample_indices
def
__getitem__
(
self
,
idx
):
"""Get item from infos according to the given index.
Returns:
dict: Data dictionary of the corresponding index.
"""
ori_idx
=
self
.
sample_indices
[
idx
]
return
self
.
dataset
[
ori_idx
]
def
__len__
(
self
):
"""Return the length of data infos.
Returns:
int: Length of data infos.
"""
return
len
(
self
.
sample_indices
)
docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/kitti2d_dataset.py
0 → 100644
View file @
ba3cd005
# Copyright (c) OpenMMLab. All rights reserved.
import
mmcv
import
numpy
as
np
from
mmdet.datasets
import
CustomDataset
from
.builder
import
DATASETS
@
DATASETS
.
register_module
()
class
Kitti2DDataset
(
CustomDataset
):
r
"""KITTI 2D Dataset.
This class serves as the API for experiments on the `KITTI Dataset
<http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d>`_.
Args:
data_root (str): Path of dataset root.
ann_file (str): Path of annotation file.
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
classes (tuple[str], optional): Classes used in the dataset.
Defaults to None.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to None.
box_type_3d (str, optional): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR'. Available options includes
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates.
filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
"""
CLASSES
=
(
'car'
,
'pedestrian'
,
'cyclist'
)
"""
Annotation format:
[
{
'image': {
'image_idx': 0,
'image_path': 'training/image_2/000000.png',
'image_shape': array([ 370, 1224], dtype=int32)
},
'point_cloud': {
'num_features': 4,
'velodyne_path': 'training/velodyne/000000.bin'
},
'calib': {
'P0': <np.ndarray> (4, 4),
'P1': <np.ndarray> (4, 4),
'P2': <np.ndarray> (4, 4),
'P3': <np.ndarray> (4, 4),
'R0_rect':4x4 np.array,
'Tr_velo_to_cam': 4x4 np.array,
'Tr_imu_to_velo': 4x4 np.array
},
'annos': {
'name': <np.ndarray> (n),
'truncated': <np.ndarray> (n),
'occluded': <np.ndarray> (n),
'alpha': <np.ndarray> (n),
'bbox': <np.ndarray> (n, 4),
'dimensions': <np.ndarray> (n, 3),
'location': <np.ndarray> (n, 3),
'rotation_y': <np.ndarray> (n),
'score': <np.ndarray> (n),
'index': array([0], dtype=int32),
'group_ids': array([0], dtype=int32),
'difficulty': array([0], dtype=int32),
'num_points_in_gt': <np.ndarray> (n),
}
}
]
"""
def
load_annotations
(
self
,
ann_file
):
"""Load annotations from ann_file.
Args:
ann_file (str): Path of the annotation file.
Returns:
list[dict]: List of annotations.
"""
self
.
data_infos
=
mmcv
.
load
(
ann_file
)
self
.
cat2label
=
{
cat_name
:
i
for
i
,
cat_name
in
enumerate
(
self
.
CLASSES
)
}
return
self
.
data_infos
def
_filter_imgs
(
self
,
min_size
=
32
):
"""Filter images without ground truths."""
valid_inds
=
[]
for
i
,
img_info
in
enumerate
(
self
.
data_infos
):
if
len
(
img_info
[
'annos'
][
'name'
])
>
0
:
valid_inds
.
append
(
i
)
return
valid_inds
def
get_ann_info
(
self
,
index
):
"""Get annotation info according to the given index.
Args:
index (int): Index of the annotation data to get.
Returns:
dict: Annotation information consists of the following keys:
- bboxes (np.ndarray): Ground truth bboxes.
- labels (np.ndarray): Labels of ground truths.
"""
# Use index to get the annos, thus the evalhook could also use this api
info
=
self
.
data_infos
[
index
]
annos
=
info
[
'annos'
]
gt_names
=
annos
[
'name'
]
gt_bboxes
=
annos
[
'bbox'
]
difficulty
=
annos
[
'difficulty'
]
# remove classes that is not needed
selected
=
self
.
keep_arrays_by_name
(
gt_names
,
self
.
CLASSES
)
gt_bboxes
=
gt_bboxes
[
selected
]
gt_names
=
gt_names
[
selected
]
difficulty
=
difficulty
[
selected
]
gt_labels
=
np
.
array
([
self
.
cat2label
[
n
]
for
n
in
gt_names
])
anns_results
=
dict
(
bboxes
=
gt_bboxes
.
astype
(
np
.
float32
),
labels
=
gt_labels
,
)
return
anns_results
def
prepare_train_img
(
self
,
idx
):
"""Training image preparation.
Args:
index (int): Index for accessing the target image data.
Returns:
dict: Training image data dict after preprocessing
corresponding to the index.
"""
img_raw_info
=
self
.
data_infos
[
idx
][
'image'
]
img_info
=
dict
(
filename
=
img_raw_info
[
'image_path'
])
ann_info
=
self
.
get_ann_info
(
idx
)
if
len
(
ann_info
[
'bboxes'
])
==
0
:
return
None
results
=
dict
(
img_info
=
img_info
,
ann_info
=
ann_info
)
if
self
.
proposals
is
not
None
:
results
[
'proposals'
]
=
self
.
proposals
[
idx
]
self
.
pre_pipeline
(
results
)
return
self
.
pipeline
(
results
)
def
prepare_test_img
(
self
,
idx
):
"""Prepare data for testing.
Args:
index (int): Index for accessing the target image data.
Returns:
dict: Testing image data dict after preprocessing
corresponding to the index.
"""
img_raw_info
=
self
.
data_infos
[
idx
][
'image'
]
img_info
=
dict
(
filename
=
img_raw_info
[
'image_path'
])
results
=
dict
(
img_info
=
img_info
)
if
self
.
proposals
is
not
None
:
results
[
'proposals'
]
=
self
.
proposals
[
idx
]
self
.
pre_pipeline
(
results
)
return
self
.
pipeline
(
results
)
def
drop_arrays_by_name
(
self
,
gt_names
,
used_classes
):
"""Drop irrelevant ground truths by name.
Args:
gt_names (list[str]): Names of ground truths.
used_classes (list[str]): Classes of interest.
Returns:
np.ndarray: Indices of ground truths that will be dropped.
"""
inds
=
[
i
for
i
,
x
in
enumerate
(
gt_names
)
if
x
not
in
used_classes
]
inds
=
np
.
array
(
inds
,
dtype
=
np
.
int64
)
return
inds
def
keep_arrays_by_name
(
self
,
gt_names
,
used_classes
):
"""Keep useful ground truths by name.
Args:
gt_names (list[str]): Names of ground truths.
used_classes (list[str]): Classes of interest.
Returns:
np.ndarray: Indices of ground truths that will be keeped.
"""
inds
=
[
i
for
i
,
x
in
enumerate
(
gt_names
)
if
x
in
used_classes
]
inds
=
np
.
array
(
inds
,
dtype
=
np
.
int64
)
return
inds
def
reformat_bbox
(
self
,
outputs
,
out
=
None
):
"""Reformat bounding boxes to KITTI 2D styles.
Args:
outputs (list[np.ndarray]): List of arrays storing the inferenced
bounding boxes and scores.
out (str, optional): The prefix of output file.
Default: None.
Returns:
list[dict]: A list of dictionaries with the kitti 2D format.
"""
from
mmdet3d.core.bbox.transforms
import
bbox2result_kitti2d
sample_idx
=
[
info
[
'image'
][
'image_idx'
]
for
info
in
self
.
data_infos
]
result_files
=
bbox2result_kitti2d
(
outputs
,
self
.
CLASSES
,
sample_idx
,
out
)
return
result_files
def
evaluate
(
self
,
result_files
,
eval_types
=
None
):
"""Evaluation in KITTI protocol.
Args:
result_files (str): Path of result files.
eval_types (str, optional): Types of evaluation. Default: None.
KITTI dataset only support 'bbox' evaluation type.
Returns:
tuple (str, dict): Average precision results in str format
and average precision results in dict format.
"""
from
mmdet3d.core.evaluation
import
kitti_eval
eval_types
=
[
'bbox'
]
if
not
eval_types
else
eval_types
assert
eval_types
in
(
'bbox'
,
[
'bbox'
]),
'KITTI data set only evaluate bbox'
gt_annos
=
[
info
[
'annos'
]
for
info
in
self
.
data_infos
]
ap_result_str
,
ap_dict
=
kitti_eval
(
gt_annos
,
result_files
,
self
.
CLASSES
,
eval_types
=
[
'bbox'
])
return
ap_result_str
,
ap_dict
docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/kitti_dataset.py
0 → 100644
View file @
ba3cd005
# Copyright (c) OpenMMLab. All rights reserved.
import
copy
import
os
import
tempfile
from
os
import
path
as
osp
import
mmcv
import
numpy
as
np
import
torch
from
mmcv.utils
import
print_log
from
..core
import
show_multi_modality_result
,
show_result
from
..core.bbox
import
(
Box3DMode
,
CameraInstance3DBoxes
,
Coord3DMode
,
LiDARInstance3DBoxes
,
points_cam2img
)
from
.builder
import
DATASETS
from
.custom_3d
import
Custom3DDataset
from
.pipelines
import
Compose
@
DATASETS
.
register_module
()
class
KittiDataset
(
Custom3DDataset
):
r
"""KITTI Dataset.
This class serves as the API for experiments on the `KITTI Dataset
<http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d>`_.
Args:
data_root (str): Path of dataset root.
ann_file (str): Path of annotation file.
split (str): Split of input data.
pts_prefix (str, optional): Prefix of points files.
Defaults to 'velodyne'.
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
classes (tuple[str], optional): Classes used in the dataset.
Defaults to None.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to None.
box_type_3d (str, optional): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR' in this dataset. Available options includes
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates.
filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
pcd_limit_range (list, optional): The range of point cloud used to
filter invalid predicted boxes.
Default: [0, -40, -3, 70.4, 40, 0.0].
"""
CLASSES
=
(
'car'
,
'pedestrian'
,
'cyclist'
)
def
__init__
(
self
,
data_root
,
ann_file
,
split
,
pts_prefix
=
'velodyne'
,
pipeline
=
None
,
classes
=
None
,
modality
=
None
,
box_type_3d
=
'LiDAR'
,
filter_empty_gt
=
True
,
test_mode
=
False
,
pcd_limit_range
=
[
0
,
-
40
,
-
3
,
70.4
,
40
,
0.0
],
**
kwargs
):
super
().
__init__
(
data_root
=
data_root
,
ann_file
=
ann_file
,
pipeline
=
pipeline
,
classes
=
classes
,
modality
=
modality
,
box_type_3d
=
box_type_3d
,
filter_empty_gt
=
filter_empty_gt
,
test_mode
=
test_mode
,
**
kwargs
)
self
.
split
=
split
self
.
root_split
=
os
.
path
.
join
(
self
.
data_root
,
split
)
assert
self
.
modality
is
not
None
self
.
pcd_limit_range
=
pcd_limit_range
self
.
pts_prefix
=
pts_prefix
def
_get_pts_filename
(
self
,
idx
):
"""Get point cloud filename according to the given index.
Args:
index (int): Index of the point cloud file to get.
Returns:
str: Name of the point cloud file.
"""
pts_filename
=
osp
.
join
(
self
.
root_split
,
self
.
pts_prefix
,
f
'
{
idx
:
06
d
}
.bin'
)
return
pts_filename
def
get_data_info
(
self
,
index
):
"""Get data info according to the given index.
Args:
index (int): Index of the sample data to get.
Returns:
dict: Data information that will be passed to the data
preprocessing pipelines. It includes the following keys:
- sample_idx (str): Sample index.
- pts_filename (str): Filename of point clouds.
- img_prefix (str): Prefix of image files.
- img_info (dict): Image info.
- lidar2img (list[np.ndarray], optional): Transformations
from lidar to different cameras.
- ann_info (dict): Annotation info.
"""
info
=
self
.
data_infos
[
index
]
sample_idx
=
info
[
'image'
][
'image_idx'
]
img_filename
=
os
.
path
.
join
(
self
.
data_root
,
info
[
'image'
][
'image_path'
])
# TODO: consider use torch.Tensor only
rect
=
info
[
'calib'
][
'R0_rect'
].
astype
(
np
.
float32
)
Trv2c
=
info
[
'calib'
][
'Tr_velo_to_cam'
].
astype
(
np
.
float32
)
P2
=
info
[
'calib'
][
'P2'
].
astype
(
np
.
float32
)
lidar2img
=
P2
@
rect
@
Trv2c
pts_filename
=
self
.
_get_pts_filename
(
sample_idx
)
input_dict
=
dict
(
sample_idx
=
sample_idx
,
pts_filename
=
pts_filename
,
img_prefix
=
None
,
img_info
=
dict
(
filename
=
img_filename
),
lidar2img
=
lidar2img
)
if
not
self
.
test_mode
:
annos
=
self
.
get_ann_info
(
index
)
input_dict
[
'ann_info'
]
=
annos
return
input_dict
def
get_ann_info
(
self
,
index
):
"""Get annotation info according to the given index.
Args:
index (int): Index of the annotation data to get.
Returns:
dict: annotation information consists of the following keys:
- gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`):
3D ground truth bboxes.
- gt_labels_3d (np.ndarray): Labels of ground truths.
- gt_bboxes (np.ndarray): 2D ground truth bboxes.
- gt_labels (np.ndarray): Labels of ground truths.
- gt_names (list[str]): Class names of ground truths.
- difficulty (int): Difficulty defined by KITTI.
0, 1, 2 represent xxxxx respectively.
"""
# Use index to get the annos, thus the evalhook could also use this api
info
=
self
.
data_infos
[
index
]
rect
=
info
[
'calib'
][
'R0_rect'
].
astype
(
np
.
float32
)
Trv2c
=
info
[
'calib'
][
'Tr_velo_to_cam'
].
astype
(
np
.
float32
)
if
'plane'
in
info
:
# convert ground plane to velodyne coordinates
reverse
=
np
.
linalg
.
inv
(
rect
@
Trv2c
)
(
plane_norm_cam
,
plane_off_cam
)
=
(
info
[
'plane'
][:
3
],
-
info
[
'plane'
][:
3
]
*
info
[
'plane'
][
3
])
plane_norm_lidar
=
\
(
reverse
[:
3
,
:
3
]
@
plane_norm_cam
[:,
None
])[:,
0
]
plane_off_lidar
=
(
reverse
[:
3
,
:
3
]
@
plane_off_cam
[:,
None
][:,
0
]
+
reverse
[:
3
,
3
])
plane_lidar
=
np
.
zeros_like
(
plane_norm_lidar
,
shape
=
(
4
,
))
plane_lidar
[:
3
]
=
plane_norm_lidar
plane_lidar
[
3
]
=
-
plane_norm_lidar
.
T
@
plane_off_lidar
else
:
plane_lidar
=
None
difficulty
=
info
[
'annos'
][
'difficulty'
]
annos
=
info
[
'annos'
]
# we need other objects to avoid collision when sample
annos
=
self
.
remove_dontcare
(
annos
)
loc
=
annos
[
'location'
]
dims
=
annos
[
'dimensions'
]
rots
=
annos
[
'rotation_y'
]
gt_names
=
annos
[
'name'
]
gt_bboxes_3d
=
np
.
concatenate
([
loc
,
dims
,
rots
[...,
np
.
newaxis
]],
axis
=
1
).
astype
(
np
.
float32
)
# convert gt_bboxes_3d to velodyne coordinates
gt_bboxes_3d
=
CameraInstance3DBoxes
(
gt_bboxes_3d
).
convert_to
(
self
.
box_mode_3d
,
np
.
linalg
.
inv
(
rect
@
Trv2c
))
gt_bboxes
=
annos
[
'bbox'
]
selected
=
self
.
drop_arrays_by_name
(
gt_names
,
[
'DontCare'
])
gt_bboxes
=
gt_bboxes
[
selected
].
astype
(
'float32'
)
gt_names
=
gt_names
[
selected
]
gt_labels
=
[]
for
cat
in
gt_names
:
if
cat
in
self
.
CLASSES
:
gt_labels
.
append
(
self
.
CLASSES
.
index
(
cat
))
else
:
gt_labels
.
append
(
-
1
)
gt_labels
=
np
.
array
(
gt_labels
).
astype
(
np
.
int64
)
gt_labels_3d
=
copy
.
deepcopy
(
gt_labels
)
anns_results
=
dict
(
gt_bboxes_3d
=
gt_bboxes_3d
,
gt_labels_3d
=
gt_labels_3d
,
bboxes
=
gt_bboxes
,
labels
=
gt_labels
,
gt_names
=
gt_names
,
plane
=
plane_lidar
,
difficulty
=
difficulty
)
return
anns_results
def
drop_arrays_by_name
(
self
,
gt_names
,
used_classes
):
"""Drop irrelevant ground truths by name.
Args:
gt_names (list[str]): Names of ground truths.
used_classes (list[str]): Classes of interest.
Returns:
np.ndarray: Indices of ground truths that will be dropped.
"""
inds
=
[
i
for
i
,
x
in
enumerate
(
gt_names
)
if
x
not
in
used_classes
]
inds
=
np
.
array
(
inds
,
dtype
=
np
.
int64
)
return
inds
def
keep_arrays_by_name
(
self
,
gt_names
,
used_classes
):
"""Keep useful ground truths by name.
Args:
gt_names (list[str]): Names of ground truths.
used_classes (list[str]): Classes of interest.
Returns:
np.ndarray: Indices of ground truths that will be keeped.
"""
inds
=
[
i
for
i
,
x
in
enumerate
(
gt_names
)
if
x
in
used_classes
]
inds
=
np
.
array
(
inds
,
dtype
=
np
.
int64
)
return
inds
def
remove_dontcare
(
self
,
ann_info
):
"""Remove annotations that do not need to be cared.
Args:
ann_info (dict): Dict of annotation infos. The ``'DontCare'``
annotations will be removed according to ann_file['name'].
Returns:
dict: Annotations after filtering.
"""
img_filtered_annotations
=
{}
relevant_annotation_indices
=
[
i
for
i
,
x
in
enumerate
(
ann_info
[
'name'
])
if
x
!=
'DontCare'
]
for
key
in
ann_info
.
keys
():
img_filtered_annotations
[
key
]
=
(
ann_info
[
key
][
relevant_annotation_indices
])
return
img_filtered_annotations
def
format_results
(
self
,
outputs
,
pklfile_prefix
=
None
,
submission_prefix
=
None
):
"""Format the results to pkl file.
Args:
outputs (list[dict]): Testing results of the dataset.
pklfile_prefix (str): The prefix of pkl files. It includes
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
submission_prefix (str): The prefix of submitted files. It
includes the file path and the prefix of filename, e.g.,
"a/b/prefix". If not specified, a temp file will be created.
Default: None.
Returns:
tuple: (result_files, tmp_dir), result_files is a dict containing
the json filepaths, tmp_dir is the temporal directory created
for saving json files when jsonfile_prefix is not specified.
"""
if
pklfile_prefix
is
None
:
tmp_dir
=
tempfile
.
TemporaryDirectory
()
pklfile_prefix
=
osp
.
join
(
tmp_dir
.
name
,
'results'
)
else
:
tmp_dir
=
None
if
not
isinstance
(
outputs
[
0
],
dict
):
result_files
=
self
.
bbox2result_kitti2d
(
outputs
,
self
.
CLASSES
,
pklfile_prefix
,
submission_prefix
)
elif
'pts_bbox'
in
outputs
[
0
]
or
'img_bbox'
in
outputs
[
0
]:
result_files
=
dict
()
for
name
in
outputs
[
0
]:
results_
=
[
out
[
name
]
for
out
in
outputs
]
pklfile_prefix_
=
pklfile_prefix
+
name
if
submission_prefix
is
not
None
:
submission_prefix_
=
submission_prefix
+
name
else
:
submission_prefix_
=
None
if
'img'
in
name
:
result_files
=
self
.
bbox2result_kitti2d
(
results_
,
self
.
CLASSES
,
pklfile_prefix_
,
submission_prefix_
)
else
:
result_files_
=
self
.
bbox2result_kitti
(
results_
,
self
.
CLASSES
,
pklfile_prefix_
,
submission_prefix_
)
result_files
[
name
]
=
result_files_
else
:
result_files
=
self
.
bbox2result_kitti
(
outputs
,
self
.
CLASSES
,
pklfile_prefix
,
submission_prefix
)
return
result_files
,
tmp_dir
def
evaluate
(
self
,
results
,
metric
=
None
,
logger
=
None
,
pklfile_prefix
=
None
,
submission_prefix
=
None
,
show
=
False
,
out_dir
=
None
,
pipeline
=
None
):
"""Evaluation in KITTI protocol.
Args:
results (list[dict]): Testing results of the dataset.
metric (str | list[str], optional): Metrics to be evaluated.
Default: None.
logger (logging.Logger | str, optional): Logger used for printing
related information during evaluation. Default: None.
pklfile_prefix (str, optional): The prefix of pkl files, including
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
submission_prefix (str, optional): The prefix of submission data.
If not specified, the submission data will not be generated.
Default: None.
show (bool, optional): Whether to visualize.
Default: False.
out_dir (str, optional): Path to save the visualization results.
Default: None.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
Returns:
dict[str, float]: Results of each evaluation metric.
"""
result_files
,
tmp_dir
=
self
.
format_results
(
results
,
pklfile_prefix
)
from
mmdet3d.core.evaluation
import
kitti_eval
gt_annos
=
[
info
[
'annos'
]
for
info
in
self
.
data_infos
]
if
isinstance
(
result_files
,
dict
):
ap_dict
=
dict
()
for
name
,
result_files_
in
result_files
.
items
():
eval_types
=
[
'bbox'
,
'bev'
,
'3d'
]
if
'img'
in
name
:
eval_types
=
[
'bbox'
]
ap_result_str
,
ap_dict_
=
kitti_eval
(
gt_annos
,
result_files_
,
self
.
CLASSES
,
eval_types
=
eval_types
)
for
ap_type
,
ap
in
ap_dict_
.
items
():
ap_dict
[
f
'
{
name
}
/
{
ap_type
}
'
]
=
float
(
'{:.4f}'
.
format
(
ap
))
print_log
(
f
'Results of
{
name
}
:
\n
'
+
ap_result_str
,
logger
=
logger
)
else
:
if
metric
==
'img_bbox'
:
ap_result_str
,
ap_dict
=
kitti_eval
(
gt_annos
,
result_files
,
self
.
CLASSES
,
eval_types
=
[
'bbox'
])
else
:
ap_result_str
,
ap_dict
=
kitti_eval
(
gt_annos
,
result_files
,
self
.
CLASSES
)
print_log
(
'
\n
'
+
ap_result_str
,
logger
=
logger
)
if
tmp_dir
is
not
None
:
tmp_dir
.
cleanup
()
if
show
or
out_dir
:
self
.
show
(
results
,
out_dir
,
show
=
show
,
pipeline
=
pipeline
)
return
ap_dict
def
bbox2result_kitti
(
self
,
net_outputs
,
class_names
,
pklfile_prefix
=
None
,
submission_prefix
=
None
):
"""Convert 3D detection results to kitti format for evaluation and test
submission.
Args:
net_outputs (list[np.ndarray]): List of array storing the
inferenced bounding boxes and scores.
class_names (list[String]): A list of class names.
pklfile_prefix (str): The prefix of pkl file.
submission_prefix (str): The prefix of submission file.
Returns:
list[dict]: A list of dictionaries with the kitti format.
"""
assert
len
(
net_outputs
)
==
len
(
self
.
data_infos
),
\
'invalid list length of network outputs'
if
submission_prefix
is
not
None
:
mmcv
.
mkdir_or_exist
(
submission_prefix
)
det_annos
=
[]
print
(
'
\n
Converting prediction to KITTI format'
)
for
idx
,
pred_dicts
in
enumerate
(
mmcv
.
track_iter_progress
(
net_outputs
)):
annos
=
[]
info
=
self
.
data_infos
[
idx
]
sample_idx
=
info
[
'image'
][
'image_idx'
]
image_shape
=
info
[
'image'
][
'image_shape'
][:
2
]
box_dict
=
self
.
convert_valid_bboxes
(
pred_dicts
,
info
)
anno
=
{
'name'
:
[],
'truncated'
:
[],
'occluded'
:
[],
'alpha'
:
[],
'bbox'
:
[],
'dimensions'
:
[],
'location'
:
[],
'rotation_y'
:
[],
'score'
:
[]
}
if
len
(
box_dict
[
'bbox'
])
>
0
:
box_2d_preds
=
box_dict
[
'bbox'
]
box_preds
=
box_dict
[
'box3d_camera'
]
scores
=
box_dict
[
'scores'
]
box_preds_lidar
=
box_dict
[
'box3d_lidar'
]
label_preds
=
box_dict
[
'label_preds'
]
for
box
,
box_lidar
,
bbox
,
score
,
label
in
zip
(
box_preds
,
box_preds_lidar
,
box_2d_preds
,
scores
,
label_preds
):
bbox
[
2
:]
=
np
.
minimum
(
bbox
[
2
:],
image_shape
[::
-
1
])
bbox
[:
2
]
=
np
.
maximum
(
bbox
[:
2
],
[
0
,
0
])
anno
[
'name'
].
append
(
class_names
[
int
(
label
)])
anno
[
'truncated'
].
append
(
0.0
)
anno
[
'occluded'
].
append
(
0
)
anno
[
'alpha'
].
append
(
-
np
.
arctan2
(
-
box_lidar
[
1
],
box_lidar
[
0
])
+
box
[
6
])
anno
[
'bbox'
].
append
(
bbox
)
anno
[
'dimensions'
].
append
(
box
[
3
:
6
])
anno
[
'location'
].
append
(
box
[:
3
])
anno
[
'rotation_y'
].
append
(
box
[
6
])
anno
[
'score'
].
append
(
score
)
anno
=
{
k
:
np
.
stack
(
v
)
for
k
,
v
in
anno
.
items
()}
annos
.
append
(
anno
)
else
:
anno
=
{
'name'
:
np
.
array
([]),
'truncated'
:
np
.
array
([]),
'occluded'
:
np
.
array
([]),
'alpha'
:
np
.
array
([]),
'bbox'
:
np
.
zeros
([
0
,
4
]),
'dimensions'
:
np
.
zeros
([
0
,
3
]),
'location'
:
np
.
zeros
([
0
,
3
]),
'rotation_y'
:
np
.
array
([]),
'score'
:
np
.
array
([]),
}
annos
.
append
(
anno
)
if
submission_prefix
is
not
None
:
curr_file
=
f
'
{
submission_prefix
}
/
{
sample_idx
:
06
d
}
.txt'
with
open
(
curr_file
,
'w'
)
as
f
:
bbox
=
anno
[
'bbox'
]
loc
=
anno
[
'location'
]
dims
=
anno
[
'dimensions'
]
# lhw -> hwl
for
idx
in
range
(
len
(
bbox
)):
print
(
'{} -1 -1 {:.4f} {:.4f} {:.4f} {:.4f} '
'{:.4f} {:.4f} {:.4f} '
'{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f}'
.
format
(
anno
[
'name'
][
idx
],
anno
[
'alpha'
][
idx
],
bbox
[
idx
][
0
],
bbox
[
idx
][
1
],
bbox
[
idx
][
2
],
bbox
[
idx
][
3
],
dims
[
idx
][
1
],
dims
[
idx
][
2
],
dims
[
idx
][
0
],
loc
[
idx
][
0
],
loc
[
idx
][
1
],
loc
[
idx
][
2
],
anno
[
'rotation_y'
][
idx
],
anno
[
'score'
][
idx
]),
file
=
f
)
annos
[
-
1
][
'sample_idx'
]
=
np
.
array
(
[
sample_idx
]
*
len
(
annos
[
-
1
][
'score'
]),
dtype
=
np
.
int64
)
det_annos
+=
annos
if
pklfile_prefix
is
not
None
:
if
not
pklfile_prefix
.
endswith
((
'.pkl'
,
'.pickle'
)):
out
=
f
'
{
pklfile_prefix
}
.pkl'
mmcv
.
dump
(
det_annos
,
out
)
print
(
f
'Result is saved to
{
out
}
.'
)
return
det_annos
def
bbox2result_kitti2d
(
self
,
net_outputs
,
class_names
,
pklfile_prefix
=
None
,
submission_prefix
=
None
):
"""Convert 2D detection results to kitti format for evaluation and test
submission.
Args:
net_outputs (list[np.ndarray]): List of array storing the
inferenced bounding boxes and scores.
class_names (list[String]): A list of class names.
pklfile_prefix (str): The prefix of pkl file.
submission_prefix (str): The prefix of submission file.
Returns:
list[dict]: A list of dictionaries have the kitti format
"""
assert
len
(
net_outputs
)
==
len
(
self
.
data_infos
),
\
'invalid list length of network outputs'
det_annos
=
[]
print
(
'
\n
Converting prediction to KITTI format'
)
for
i
,
bboxes_per_sample
in
enumerate
(
mmcv
.
track_iter_progress
(
net_outputs
)):
annos
=
[]
anno
=
dict
(
name
=
[],
truncated
=
[],
occluded
=
[],
alpha
=
[],
bbox
=
[],
dimensions
=
[],
location
=
[],
rotation_y
=
[],
score
=
[])
sample_idx
=
self
.
data_infos
[
i
][
'image'
][
'image_idx'
]
num_example
=
0
for
label
in
range
(
len
(
bboxes_per_sample
)):
bbox
=
bboxes_per_sample
[
label
]
for
i
in
range
(
bbox
.
shape
[
0
]):
anno
[
'name'
].
append
(
class_names
[
int
(
label
)])
anno
[
'truncated'
].
append
(
0.0
)
anno
[
'occluded'
].
append
(
0
)
anno
[
'alpha'
].
append
(
0.0
)
anno
[
'bbox'
].
append
(
bbox
[
i
,
:
4
])
# set dimensions (height, width, length) to zero
anno
[
'dimensions'
].
append
(
np
.
zeros
(
shape
=
[
3
],
dtype
=
np
.
float32
))
# set the 3D translation to (-1000, -1000, -1000)
anno
[
'location'
].
append
(
np
.
ones
(
shape
=
[
3
],
dtype
=
np
.
float32
)
*
(
-
1000.0
))
anno
[
'rotation_y'
].
append
(
0.0
)
anno
[
'score'
].
append
(
bbox
[
i
,
4
])
num_example
+=
1
if
num_example
==
0
:
annos
.
append
(
dict
(
name
=
np
.
array
([]),
truncated
=
np
.
array
([]),
occluded
=
np
.
array
([]),
alpha
=
np
.
array
([]),
bbox
=
np
.
zeros
([
0
,
4
]),
dimensions
=
np
.
zeros
([
0
,
3
]),
location
=
np
.
zeros
([
0
,
3
]),
rotation_y
=
np
.
array
([]),
score
=
np
.
array
([]),
))
else
:
anno
=
{
k
:
np
.
stack
(
v
)
for
k
,
v
in
anno
.
items
()}
annos
.
append
(
anno
)
annos
[
-
1
][
'sample_idx'
]
=
np
.
array
(
[
sample_idx
]
*
num_example
,
dtype
=
np
.
int64
)
det_annos
+=
annos
if
pklfile_prefix
is
not
None
:
# save file in pkl format
pklfile_path
=
(
pklfile_prefix
[:
-
4
]
if
pklfile_prefix
.
endswith
(
(
'.pkl'
,
'.pickle'
))
else
pklfile_prefix
)
mmcv
.
dump
(
det_annos
,
pklfile_path
)
if
submission_prefix
is
not
None
:
# save file in submission format
mmcv
.
mkdir_or_exist
(
submission_prefix
)
print
(
f
'Saving KITTI submission to
{
submission_prefix
}
'
)
for
i
,
anno
in
enumerate
(
det_annos
):
sample_idx
=
self
.
data_infos
[
i
][
'image'
][
'image_idx'
]
cur_det_file
=
f
'
{
submission_prefix
}
/
{
sample_idx
:
06
d
}
.txt'
with
open
(
cur_det_file
,
'w'
)
as
f
:
bbox
=
anno
[
'bbox'
]
loc
=
anno
[
'location'
]
dims
=
anno
[
'dimensions'
][::
-
1
]
# lhw -> hwl
for
idx
in
range
(
len
(
bbox
)):
print
(
'{} -1 -1 {:4f} {:4f} {:4f} {:4f} {:4f} {:4f} '
'{:4f} {:4f} {:4f} {:4f} {:4f} {:4f} {:4f}'
.
format
(
anno
[
'name'
][
idx
],
anno
[
'alpha'
][
idx
],
*
bbox
[
idx
],
# 4 float
*
dims
[
idx
],
# 3 float
*
loc
[
idx
],
# 3 float
anno
[
'rotation_y'
][
idx
],
anno
[
'score'
][
idx
]),
file
=
f
,
)
print
(
f
'Result is saved to
{
submission_prefix
}
'
)
return
det_annos
def
convert_valid_bboxes
(
self
,
box_dict
,
info
):
"""Convert the predicted boxes into valid ones.
Args:
box_dict (dict): Box dictionaries to be converted.
- boxes_3d (:obj:`LiDARInstance3DBoxes`): 3D bounding boxes.
- scores_3d (torch.Tensor): Scores of boxes.
- labels_3d (torch.Tensor): Class labels of boxes.
info (dict): Data info.
Returns:
dict: Valid predicted boxes.
- bbox (np.ndarray): 2D bounding boxes.
- box3d_camera (np.ndarray): 3D bounding boxes in
camera coordinate.
- box3d_lidar (np.ndarray): 3D bounding boxes in
LiDAR coordinate.
- scores (np.ndarray): Scores of boxes.
- label_preds (np.ndarray): Class label predictions.
- sample_idx (int): Sample index.
"""
# TODO: refactor this function
box_preds
=
box_dict
[
'boxes_3d'
]
scores
=
box_dict
[
'scores_3d'
]
labels
=
box_dict
[
'labels_3d'
]
sample_idx
=
info
[
'image'
][
'image_idx'
]
box_preds
.
limit_yaw
(
offset
=
0.5
,
period
=
np
.
pi
*
2
)
if
len
(
box_preds
)
==
0
:
return
dict
(
bbox
=
np
.
zeros
([
0
,
4
]),
box3d_camera
=
np
.
zeros
([
0
,
7
]),
box3d_lidar
=
np
.
zeros
([
0
,
7
]),
scores
=
np
.
zeros
([
0
]),
label_preds
=
np
.
zeros
([
0
,
4
]),
sample_idx
=
sample_idx
)
rect
=
info
[
'calib'
][
'R0_rect'
].
astype
(
np
.
float32
)
Trv2c
=
info
[
'calib'
][
'Tr_velo_to_cam'
].
astype
(
np
.
float32
)
P2
=
info
[
'calib'
][
'P2'
].
astype
(
np
.
float32
)
img_shape
=
info
[
'image'
][
'image_shape'
]
P2
=
box_preds
.
tensor
.
new_tensor
(
P2
)
box_preds_camera
=
box_preds
.
convert_to
(
Box3DMode
.
CAM
,
rect
@
Trv2c
)
box_corners
=
box_preds_camera
.
corners
box_corners_in_image
=
points_cam2img
(
box_corners
,
P2
)
# box_corners_in_image: [N, 8, 2]
minxy
=
torch
.
min
(
box_corners_in_image
,
dim
=
1
)[
0
]
maxxy
=
torch
.
max
(
box_corners_in_image
,
dim
=
1
)[
0
]
box_2d_preds
=
torch
.
cat
([
minxy
,
maxxy
],
dim
=
1
)
# Post-processing
# check box_preds_camera
image_shape
=
box_preds
.
tensor
.
new_tensor
(
img_shape
)
valid_cam_inds
=
((
box_2d_preds
[:,
0
]
<
image_shape
[
1
])
&
(
box_2d_preds
[:,
1
]
<
image_shape
[
0
])
&
(
box_2d_preds
[:,
2
]
>
0
)
&
(
box_2d_preds
[:,
3
]
>
0
))
# check box_preds
limit_range
=
box_preds
.
tensor
.
new_tensor
(
self
.
pcd_limit_range
)
valid_pcd_inds
=
((
box_preds
.
center
>
limit_range
[:
3
])
&
(
box_preds
.
center
<
limit_range
[
3
:]))
valid_inds
=
valid_cam_inds
&
valid_pcd_inds
.
all
(
-
1
)
if
valid_inds
.
sum
()
>
0
:
return
dict
(
bbox
=
box_2d_preds
[
valid_inds
,
:].
numpy
(),
box3d_camera
=
box_preds_camera
[
valid_inds
].
tensor
.
numpy
(),
box3d_lidar
=
box_preds
[
valid_inds
].
tensor
.
numpy
(),
scores
=
scores
[
valid_inds
].
numpy
(),
label_preds
=
labels
[
valid_inds
].
numpy
(),
sample_idx
=
sample_idx
)
else
:
return
dict
(
bbox
=
np
.
zeros
([
0
,
4
]),
box3d_camera
=
np
.
zeros
([
0
,
7
]),
box3d_lidar
=
np
.
zeros
([
0
,
7
]),
scores
=
np
.
zeros
([
0
]),
label_preds
=
np
.
zeros
([
0
,
4
]),
sample_idx
=
sample_idx
)
def
_build_default_pipeline
(
self
):
"""Build the default pipeline for this dataset."""
pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
4
,
use_dim
=
4
,
file_client_args
=
dict
(
backend
=
'disk'
)),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
self
.
CLASSES
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
]
if
self
.
modality
[
'use_camera'
]:
pipeline
.
insert
(
0
,
dict
(
type
=
'LoadImageFromFile'
))
return
Compose
(
pipeline
)
def
show
(
self
,
results
,
out_dir
,
show
=
True
,
pipeline
=
None
):
"""Results visualization.
Args:
results (list[dict]): List of bounding boxes results.
out_dir (str): Output directory of visualization result.
show (bool): Whether to visualize the results online.
Default: False.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
"""
assert
out_dir
is
not
None
,
'Expect out_dir, got none.'
pipeline
=
self
.
_get_pipeline
(
pipeline
)
for
i
,
result
in
enumerate
(
results
):
if
'pts_bbox'
in
result
.
keys
():
result
=
result
[
'pts_bbox'
]
data_info
=
self
.
data_infos
[
i
]
pts_path
=
data_info
[
'point_cloud'
][
'velodyne_path'
]
file_name
=
osp
.
split
(
pts_path
)[
-
1
].
split
(
'.'
)[
0
]
points
,
img_metas
,
img
=
self
.
_extract_data
(
i
,
pipeline
,
[
'points'
,
'img_metas'
,
'img'
])
points
=
points
.
numpy
()
# for now we convert points into depth mode
points
=
Coord3DMode
.
convert_point
(
points
,
Coord3DMode
.
LIDAR
,
Coord3DMode
.
DEPTH
)
gt_bboxes
=
self
.
get_ann_info
(
i
)[
'gt_bboxes_3d'
].
tensor
.
numpy
()
show_gt_bboxes
=
Box3DMode
.
convert
(
gt_bboxes
,
Box3DMode
.
LIDAR
,
Box3DMode
.
DEPTH
)
pred_bboxes
=
result
[
'boxes_3d'
].
tensor
.
numpy
()
show_pred_bboxes
=
Box3DMode
.
convert
(
pred_bboxes
,
Box3DMode
.
LIDAR
,
Box3DMode
.
DEPTH
)
show_result
(
points
,
show_gt_bboxes
,
show_pred_bboxes
,
out_dir
,
file_name
,
show
)
# multi-modality visualization
if
self
.
modality
[
'use_camera'
]
and
'lidar2img'
in
img_metas
.
keys
():
img
=
img
.
numpy
()
# need to transpose channel to first dim
img
=
img
.
transpose
(
1
,
2
,
0
)
show_pred_bboxes
=
LiDARInstance3DBoxes
(
pred_bboxes
,
origin
=
(
0.5
,
0.5
,
0
))
show_gt_bboxes
=
LiDARInstance3DBoxes
(
gt_bboxes
,
origin
=
(
0.5
,
0.5
,
0
))
show_multi_modality_result
(
img
,
show_gt_bboxes
,
show_pred_bboxes
,
img_metas
[
'lidar2img'
],
out_dir
,
file_name
,
box_mode
=
'lidar'
,
show
=
show
)
docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/kitti_mono_dataset.py
0 → 100644
View file @
ba3cd005
# Copyright (c) OpenMMLab. All rights reserved.
import
copy
import
tempfile
from
os
import
path
as
osp
import
mmcv
import
numpy
as
np
import
torch
from
mmcv.utils
import
print_log
from
..core.bbox
import
Box3DMode
,
CameraInstance3DBoxes
,
points_cam2img
from
.builder
import
DATASETS
from
.nuscenes_mono_dataset
import
NuScenesMonoDataset
@
DATASETS
.
register_module
()
class
KittiMonoDataset
(
NuScenesMonoDataset
):
"""Monocular 3D detection on KITTI Dataset.
Args:
data_root (str): Path of dataset root.
info_file (str): Path of info file.
load_interval (int, optional): Interval of loading the dataset. It is
used to uniformly sample the dataset. Defaults to 1.
with_velocity (bool, optional): Whether include velocity prediction
into the experiments. Defaults to False.
eval_version (str, optional): Configuration version of evaluation.
Defaults to None.
version (str, optional): Dataset version. Defaults to None.
kwargs (dict): Other arguments are the same of NuScenesMonoDataset.
"""
CLASSES
=
(
'Pedestrian'
,
'Cyclist'
,
'Car'
)
def
__init__
(
self
,
data_root
,
info_file
,
ann_file
,
pipeline
,
load_interval
=
1
,
with_velocity
=
False
,
eval_version
=
None
,
version
=
None
,
**
kwargs
):
super
().
__init__
(
data_root
=
data_root
,
ann_file
=
ann_file
,
pipeline
=
pipeline
,
load_interval
=
load_interval
,
with_velocity
=
with_velocity
,
eval_version
=
eval_version
,
version
=
version
,
**
kwargs
)
self
.
anno_infos
=
mmcv
.
load
(
info_file
)
self
.
bbox_code_size
=
7
def
_parse_ann_info
(
self
,
img_info
,
ann_info
):
"""Parse bbox and mask annotation.
Args:
ann_info (list[dict]): Annotation info of an image.
with_mask (bool): Whether to parse mask annotations.
Returns:
dict: A dict containing the following keys: bboxes, bboxes_ignore,
labels, masks, seg_map. "masks" are raw annotations and not
decoded into binary masks.
"""
gt_bboxes
=
[]
gt_labels
=
[]
gt_bboxes_ignore
=
[]
gt_masks_ann
=
[]
gt_bboxes_cam3d
=
[]
centers2d
=
[]
depths
=
[]
for
i
,
ann
in
enumerate
(
ann_info
):
if
ann
.
get
(
'ignore'
,
False
):
continue
x1
,
y1
,
w
,
h
=
ann
[
'bbox'
]
inter_w
=
max
(
0
,
min
(
x1
+
w
,
img_info
[
'width'
])
-
max
(
x1
,
0
))
inter_h
=
max
(
0
,
min
(
y1
+
h
,
img_info
[
'height'
])
-
max
(
y1
,
0
))
if
inter_w
*
inter_h
==
0
:
continue
if
ann
[
'area'
]
<=
0
or
w
<
1
or
h
<
1
:
continue
if
ann
[
'category_id'
]
not
in
self
.
cat_ids
:
continue
bbox
=
[
x1
,
y1
,
x1
+
w
,
y1
+
h
]
if
ann
.
get
(
'iscrowd'
,
False
):
gt_bboxes_ignore
.
append
(
bbox
)
else
:
gt_bboxes
.
append
(
bbox
)
gt_labels
.
append
(
self
.
cat2label
[
ann
[
'category_id'
]])
gt_masks_ann
.
append
(
ann
.
get
(
'segmentation'
,
None
))
# 3D annotations in camera coordinates
bbox_cam3d
=
np
.
array
(
ann
[
'bbox_cam3d'
]).
reshape
(
-
1
,
)
gt_bboxes_cam3d
.
append
(
bbox_cam3d
)
# 2.5D annotations in camera coordinates
center2d
=
ann
[
'center2d'
][:
2
]
depth
=
ann
[
'center2d'
][
2
]
centers2d
.
append
(
center2d
)
depths
.
append
(
depth
)
if
gt_bboxes
:
gt_bboxes
=
np
.
array
(
gt_bboxes
,
dtype
=
np
.
float32
)
gt_labels
=
np
.
array
(
gt_labels
,
dtype
=
np
.
int64
)
else
:
gt_bboxes
=
np
.
zeros
((
0
,
4
),
dtype
=
np
.
float32
)
gt_labels
=
np
.
array
([],
dtype
=
np
.
int64
)
if
gt_bboxes_cam3d
:
gt_bboxes_cam3d
=
np
.
array
(
gt_bboxes_cam3d
,
dtype
=
np
.
float32
)
centers2d
=
np
.
array
(
centers2d
,
dtype
=
np
.
float32
)
depths
=
np
.
array
(
depths
,
dtype
=
np
.
float32
)
else
:
gt_bboxes_cam3d
=
np
.
zeros
((
0
,
self
.
bbox_code_size
),
dtype
=
np
.
float32
)
centers2d
=
np
.
zeros
((
0
,
2
),
dtype
=
np
.
float32
)
depths
=
np
.
zeros
((
0
),
dtype
=
np
.
float32
)
gt_bboxes_cam3d
=
CameraInstance3DBoxes
(
gt_bboxes_cam3d
,
box_dim
=
gt_bboxes_cam3d
.
shape
[
-
1
],
origin
=
(
0.5
,
0.5
,
0.5
))
gt_labels_3d
=
copy
.
deepcopy
(
gt_labels
)
if
gt_bboxes_ignore
:
gt_bboxes_ignore
=
np
.
array
(
gt_bboxes_ignore
,
dtype
=
np
.
float32
)
else
:
gt_bboxes_ignore
=
np
.
zeros
((
0
,
4
),
dtype
=
np
.
float32
)
seg_map
=
img_info
[
'filename'
].
replace
(
'jpg'
,
'png'
)
ann
=
dict
(
bboxes
=
gt_bboxes
,
labels
=
gt_labels
,
gt_bboxes_3d
=
gt_bboxes_cam3d
,
gt_labels_3d
=
gt_labels_3d
,
centers2d
=
centers2d
,
depths
=
depths
,
bboxes_ignore
=
gt_bboxes_ignore
,
masks
=
gt_masks_ann
,
seg_map
=
seg_map
)
return
ann
def
format_results
(
self
,
outputs
,
pklfile_prefix
=
None
,
submission_prefix
=
None
):
"""Format the results to pkl file.
Args:
outputs (list[dict]): Testing results of the dataset.
pklfile_prefix (str): The prefix of pkl files. It includes
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
submission_prefix (str): The prefix of submitted files. It
includes the file path and the prefix of filename, e.g.,
"a/b/prefix". If not specified, a temp file will be created.
Default: None.
Returns:
tuple: (result_files, tmp_dir), result_files is a dict containing
the json filepaths, tmp_dir is the temporal directory created
for saving json files when jsonfile_prefix is not specified.
"""
if
pklfile_prefix
is
None
:
tmp_dir
=
tempfile
.
TemporaryDirectory
()
pklfile_prefix
=
osp
.
join
(
tmp_dir
.
name
,
'results'
)
else
:
tmp_dir
=
None
if
not
isinstance
(
outputs
[
0
],
dict
):
result_files
=
self
.
bbox2result_kitti2d
(
outputs
,
self
.
CLASSES
,
pklfile_prefix
,
submission_prefix
)
elif
'pts_bbox'
in
outputs
[
0
]
or
'img_bbox'
in
outputs
[
0
]
or
\
'img_bbox2d'
in
outputs
[
0
]:
result_files
=
dict
()
for
name
in
outputs
[
0
]:
results_
=
[
out
[
name
]
for
out
in
outputs
]
pklfile_prefix_
=
pklfile_prefix
+
name
if
submission_prefix
is
not
None
:
submission_prefix_
=
submission_prefix
+
name
else
:
submission_prefix_
=
None
if
'2d'
in
name
:
result_files_
=
self
.
bbox2result_kitti2d
(
results_
,
self
.
CLASSES
,
pklfile_prefix_
,
submission_prefix_
)
else
:
result_files_
=
self
.
bbox2result_kitti
(
results_
,
self
.
CLASSES
,
pklfile_prefix_
,
submission_prefix_
)
result_files
[
name
]
=
result_files_
else
:
result_files
=
self
.
bbox2result_kitti
(
outputs
,
self
.
CLASSES
,
pklfile_prefix
,
submission_prefix
)
return
result_files
,
tmp_dir
def
evaluate
(
self
,
results
,
metric
=
None
,
logger
=
None
,
pklfile_prefix
=
None
,
submission_prefix
=
None
,
show
=
False
,
out_dir
=
None
,
pipeline
=
None
):
"""Evaluation in KITTI protocol.
Args:
results (list[dict]): Testing results of the dataset.
metric (str | list[str], optional): Metrics to be evaluated.
Defaults to None.
logger (logging.Logger | str, optional): Logger used for printing
related information during evaluation. Default: None.
pklfile_prefix (str, optional): The prefix of pkl files, including
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
submission_prefix (str, optional): The prefix of submission data.
If not specified, the submission data will not be generated.
show (bool, optional): Whether to visualize.
Default: False.
out_dir (str, optional): Path to save the visualization results.
Default: None.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
Returns:
dict[str, float]: Results of each evaluation metric.
"""
result_files
,
tmp_dir
=
self
.
format_results
(
results
,
pklfile_prefix
)
from
mmdet3d.core.evaluation
import
kitti_eval
gt_annos
=
[
info
[
'annos'
]
for
info
in
self
.
anno_infos
]
if
isinstance
(
result_files
,
dict
):
ap_dict
=
dict
()
for
name
,
result_files_
in
result_files
.
items
():
eval_types
=
[
'bbox'
,
'bev'
,
'3d'
]
if
'2d'
in
name
:
eval_types
=
[
'bbox'
]
ap_result_str
,
ap_dict_
=
kitti_eval
(
gt_annos
,
result_files_
,
self
.
CLASSES
,
eval_types
=
eval_types
)
for
ap_type
,
ap
in
ap_dict_
.
items
():
ap_dict
[
f
'
{
name
}
/
{
ap_type
}
'
]
=
float
(
'{:.4f}'
.
format
(
ap
))
print_log
(
f
'Results of
{
name
}
:
\n
'
+
ap_result_str
,
logger
=
logger
)
else
:
if
metric
==
'img_bbox2d'
:
ap_result_str
,
ap_dict
=
kitti_eval
(
gt_annos
,
result_files
,
self
.
CLASSES
,
eval_types
=
[
'bbox'
])
else
:
ap_result_str
,
ap_dict
=
kitti_eval
(
gt_annos
,
result_files
,
self
.
CLASSES
)
print_log
(
'
\n
'
+
ap_result_str
,
logger
=
logger
)
if
tmp_dir
is
not
None
:
tmp_dir
.
cleanup
()
if
show
or
out_dir
:
self
.
show
(
results
,
out_dir
,
show
=
show
,
pipeline
=
pipeline
)
return
ap_dict
def
bbox2result_kitti
(
self
,
net_outputs
,
class_names
,
pklfile_prefix
=
None
,
submission_prefix
=
None
):
"""Convert 3D detection results to kitti format for evaluation and test
submission.
Args:
net_outputs (list[np.ndarray]): List of array storing the
inferenced bounding boxes and scores.
class_names (list[String]): A list of class names.
pklfile_prefix (str): The prefix of pkl file.
submission_prefix (str): The prefix of submission file.
Returns:
list[dict]: A list of dictionaries with the kitti format.
"""
assert
len
(
net_outputs
)
==
len
(
self
.
anno_infos
)
if
submission_prefix
is
not
None
:
mmcv
.
mkdir_or_exist
(
submission_prefix
)
det_annos
=
[]
print
(
'
\n
Converting prediction to KITTI format'
)
for
idx
,
pred_dicts
in
enumerate
(
mmcv
.
track_iter_progress
(
net_outputs
)):
annos
=
[]
info
=
self
.
anno_infos
[
idx
]
sample_idx
=
info
[
'image'
][
'image_idx'
]
image_shape
=
info
[
'image'
][
'image_shape'
][:
2
]
box_dict
=
self
.
convert_valid_bboxes
(
pred_dicts
,
info
)
anno
=
{
'name'
:
[],
'truncated'
:
[],
'occluded'
:
[],
'alpha'
:
[],
'bbox'
:
[],
'dimensions'
:
[],
'location'
:
[],
'rotation_y'
:
[],
'score'
:
[]
}
if
len
(
box_dict
[
'bbox'
])
>
0
:
box_2d_preds
=
box_dict
[
'bbox'
]
box_preds
=
box_dict
[
'box3d_camera'
]
scores
=
box_dict
[
'scores'
]
box_preds_lidar
=
box_dict
[
'box3d_lidar'
]
label_preds
=
box_dict
[
'label_preds'
]
for
box
,
box_lidar
,
bbox
,
score
,
label
in
zip
(
box_preds
,
box_preds_lidar
,
box_2d_preds
,
scores
,
label_preds
):
bbox
[
2
:]
=
np
.
minimum
(
bbox
[
2
:],
image_shape
[::
-
1
])
bbox
[:
2
]
=
np
.
maximum
(
bbox
[:
2
],
[
0
,
0
])
anno
[
'name'
].
append
(
class_names
[
int
(
label
)])
anno
[
'truncated'
].
append
(
0.0
)
anno
[
'occluded'
].
append
(
0
)
anno
[
'alpha'
].
append
(
-
np
.
arctan2
(
box
[
0
],
box
[
2
])
+
box
[
6
])
anno
[
'bbox'
].
append
(
bbox
)
anno
[
'dimensions'
].
append
(
box
[
3
:
6
])
anno
[
'location'
].
append
(
box
[:
3
])
anno
[
'rotation_y'
].
append
(
box
[
6
])
anno
[
'score'
].
append
(
score
)
anno
=
{
k
:
np
.
stack
(
v
)
for
k
,
v
in
anno
.
items
()}
annos
.
append
(
anno
)
else
:
anno
=
{
'name'
:
np
.
array
([]),
'truncated'
:
np
.
array
([]),
'occluded'
:
np
.
array
([]),
'alpha'
:
np
.
array
([]),
'bbox'
:
np
.
zeros
([
0
,
4
]),
'dimensions'
:
np
.
zeros
([
0
,
3
]),
'location'
:
np
.
zeros
([
0
,
3
]),
'rotation_y'
:
np
.
array
([]),
'score'
:
np
.
array
([]),
}
annos
.
append
(
anno
)
if
submission_prefix
is
not
None
:
curr_file
=
f
'
{
submission_prefix
}
/
{
sample_idx
:
06
d
}
.txt'
with
open
(
curr_file
,
'w'
)
as
f
:
bbox
=
anno
[
'bbox'
]
loc
=
anno
[
'location'
]
dims
=
anno
[
'dimensions'
]
# lhw -> hwl
for
idx
in
range
(
len
(
bbox
)):
print
(
'{} -1 -1 {:.4f} {:.4f} {:.4f} {:.4f} '
'{:.4f} {:.4f} {:.4f} '
'{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f}'
.
format
(
anno
[
'name'
][
idx
],
anno
[
'alpha'
][
idx
],
bbox
[
idx
][
0
],
bbox
[
idx
][
1
],
bbox
[
idx
][
2
],
bbox
[
idx
][
3
],
dims
[
idx
][
1
],
dims
[
idx
][
2
],
dims
[
idx
][
0
],
loc
[
idx
][
0
],
loc
[
idx
][
1
],
loc
[
idx
][
2
],
anno
[
'rotation_y'
][
idx
],
anno
[
'score'
][
idx
]),
file
=
f
)
annos
[
-
1
][
'sample_idx'
]
=
np
.
array
(
[
sample_idx
]
*
len
(
annos
[
-
1
][
'score'
]),
dtype
=
np
.
int64
)
det_annos
+=
annos
if
pklfile_prefix
is
not
None
:
if
not
pklfile_prefix
.
endswith
((
'.pkl'
,
'.pickle'
)):
out
=
f
'
{
pklfile_prefix
}
.pkl'
mmcv
.
dump
(
det_annos
,
out
)
print
(
'Result is saved to %s'
%
out
)
return
det_annos
def
bbox2result_kitti2d
(
self
,
net_outputs
,
class_names
,
pklfile_prefix
=
None
,
submission_prefix
=
None
):
"""Convert 2D detection results to kitti format for evaluation and test
submission.
Args:
net_outputs (list[np.ndarray]): List of array storing the
inferenced bounding boxes and scores.
class_names (list[String]): A list of class names.
pklfile_prefix (str): The prefix of pkl file.
submission_prefix (str): The prefix of submission file.
Returns:
list[dict]: A list of dictionaries have the kitti format
"""
assert
len
(
net_outputs
)
==
len
(
self
.
anno_infos
)
det_annos
=
[]
print
(
'
\n
Converting prediction to KITTI format'
)
for
i
,
bboxes_per_sample
in
enumerate
(
mmcv
.
track_iter_progress
(
net_outputs
)):
annos
=
[]
anno
=
dict
(
name
=
[],
truncated
=
[],
occluded
=
[],
alpha
=
[],
bbox
=
[],
dimensions
=
[],
location
=
[],
rotation_y
=
[],
score
=
[])
sample_idx
=
self
.
anno_infos
[
i
][
'image'
][
'image_idx'
]
num_example
=
0
for
label
in
range
(
len
(
bboxes_per_sample
)):
bbox
=
bboxes_per_sample
[
label
]
for
i
in
range
(
bbox
.
shape
[
0
]):
anno
[
'name'
].
append
(
class_names
[
int
(
label
)])
anno
[
'truncated'
].
append
(
0.0
)
anno
[
'occluded'
].
append
(
0
)
anno
[
'alpha'
].
append
(
-
10
)
anno
[
'bbox'
].
append
(
bbox
[
i
,
:
4
])
# set dimensions (height, width, length) to zero
anno
[
'dimensions'
].
append
(
np
.
zeros
(
shape
=
[
3
],
dtype
=
np
.
float32
))
# set the 3D translation to (-1000, -1000, -1000)
anno
[
'location'
].
append
(
np
.
ones
(
shape
=
[
3
],
dtype
=
np
.
float32
)
*
(
-
1000.0
))
anno
[
'rotation_y'
].
append
(
0.0
)
anno
[
'score'
].
append
(
bbox
[
i
,
4
])
num_example
+=
1
if
num_example
==
0
:
annos
.
append
(
dict
(
name
=
np
.
array
([]),
truncated
=
np
.
array
([]),
occluded
=
np
.
array
([]),
alpha
=
np
.
array
([]),
bbox
=
np
.
zeros
([
0
,
4
]),
dimensions
=
np
.
zeros
([
0
,
3
]),
location
=
np
.
zeros
([
0
,
3
]),
rotation_y
=
np
.
array
([]),
score
=
np
.
array
([]),
))
else
:
anno
=
{
k
:
np
.
stack
(
v
)
for
k
,
v
in
anno
.
items
()}
annos
.
append
(
anno
)
annos
[
-
1
][
'sample_idx'
]
=
np
.
array
(
[
sample_idx
]
*
num_example
,
dtype
=
np
.
int64
)
det_annos
+=
annos
if
pklfile_prefix
is
not
None
:
if
not
pklfile_prefix
.
endswith
((
'.pkl'
,
'.pickle'
)):
out
=
f
'
{
pklfile_prefix
}
.pkl'
mmcv
.
dump
(
det_annos
,
out
)
print
(
'Result is saved to %s'
%
out
)
if
submission_prefix
is
not
None
:
# save file in submission format
mmcv
.
mkdir_or_exist
(
submission_prefix
)
print
(
f
'Saving KITTI submission to
{
submission_prefix
}
'
)
for
i
,
anno
in
enumerate
(
det_annos
):
sample_idx
=
self
.
anno_infos
[
i
][
'image'
][
'image_idx'
]
cur_det_file
=
f
'
{
submission_prefix
}
/
{
sample_idx
:
06
d
}
.txt'
with
open
(
cur_det_file
,
'w'
)
as
f
:
bbox
=
anno
[
'bbox'
]
loc
=
anno
[
'location'
]
dims
=
anno
[
'dimensions'
][::
-
1
]
# lhw -> hwl
for
idx
in
range
(
len
(
bbox
)):
print
(
'{} -1 -1 {:4f} {:4f} {:4f} {:4f} {:4f} {:4f} '
'{:4f} {:4f} {:4f} {:4f} {:4f} {:4f} {:4f}'
.
format
(
anno
[
'name'
][
idx
],
anno
[
'alpha'
][
idx
],
*
bbox
[
idx
],
# 4 float
*
dims
[
idx
],
# 3 float
*
loc
[
idx
],
# 3 float
anno
[
'rotation_y'
][
idx
],
anno
[
'score'
][
idx
]),
file
=
f
,
)
print
(
f
'Result is saved to
{
submission_prefix
}
'
)
return
det_annos
def
convert_valid_bboxes
(
self
,
box_dict
,
info
):
"""Convert the predicted boxes into valid ones.
Args:
box_dict (dict): Box dictionaries to be converted.
- boxes_3d (:obj:`CameraInstance3DBoxes`): 3D bounding boxes.
- scores_3d (torch.Tensor): Scores of boxes.
- labels_3d (torch.Tensor): Class labels of boxes.
info (dict): Data info.
Returns:
dict: Valid predicted boxes.
- bbox (np.ndarray): 2D bounding boxes.
- box3d_camera (np.ndarray): 3D bounding boxes in
camera coordinate.
- scores (np.ndarray): Scores of boxes.
- label_preds (np.ndarray): Class label predictions.
- sample_idx (int): Sample index.
"""
box_preds
=
box_dict
[
'boxes_3d'
]
scores
=
box_dict
[
'scores_3d'
]
labels
=
box_dict
[
'labels_3d'
]
sample_idx
=
info
[
'image'
][
'image_idx'
]
if
len
(
box_preds
)
==
0
:
return
dict
(
bbox
=
np
.
zeros
([
0
,
4
]),
box3d_camera
=
np
.
zeros
([
0
,
7
]),
scores
=
np
.
zeros
([
0
]),
label_preds
=
np
.
zeros
([
0
,
4
]),
sample_idx
=
sample_idx
)
rect
=
info
[
'calib'
][
'R0_rect'
].
astype
(
np
.
float32
)
Trv2c
=
info
[
'calib'
][
'Tr_velo_to_cam'
].
astype
(
np
.
float32
)
P2
=
info
[
'calib'
][
'P2'
].
astype
(
np
.
float32
)
img_shape
=
info
[
'image'
][
'image_shape'
]
P2
=
box_preds
.
tensor
.
new_tensor
(
P2
)
box_preds_camera
=
box_preds
box_preds_lidar
=
box_preds
.
convert_to
(
Box3DMode
.
LIDAR
,
np
.
linalg
.
inv
(
rect
@
Trv2c
))
box_corners
=
box_preds_camera
.
corners
box_corners_in_image
=
points_cam2img
(
box_corners
,
P2
)
# box_corners_in_image: [N, 8, 2]
minxy
=
torch
.
min
(
box_corners_in_image
,
dim
=
1
)[
0
]
maxxy
=
torch
.
max
(
box_corners_in_image
,
dim
=
1
)[
0
]
box_2d_preds
=
torch
.
cat
([
minxy
,
maxxy
],
dim
=
1
)
# Post-processing
# check box_preds_camera
image_shape
=
box_preds
.
tensor
.
new_tensor
(
img_shape
)
valid_cam_inds
=
((
box_2d_preds
[:,
0
]
<
image_shape
[
1
])
&
(
box_2d_preds
[:,
1
]
<
image_shape
[
0
])
&
(
box_2d_preds
[:,
2
]
>
0
)
&
(
box_2d_preds
[:,
3
]
>
0
))
# check box_preds
valid_inds
=
valid_cam_inds
if
valid_inds
.
sum
()
>
0
:
return
dict
(
bbox
=
box_2d_preds
[
valid_inds
,
:].
numpy
(),
box3d_camera
=
box_preds_camera
[
valid_inds
].
tensor
.
numpy
(),
box3d_lidar
=
box_preds_lidar
[
valid_inds
].
tensor
.
numpy
(),
scores
=
scores
[
valid_inds
].
numpy
(),
label_preds
=
labels
[
valid_inds
].
numpy
(),
sample_idx
=
sample_idx
)
else
:
return
dict
(
bbox
=
np
.
zeros
([
0
,
4
]),
box3d_camera
=
np
.
zeros
([
0
,
7
]),
box3d_lidar
=
np
.
zeros
([
0
,
7
]),
scores
=
np
.
zeros
([
0
]),
label_preds
=
np
.
zeros
([
0
,
4
]),
sample_idx
=
sample_idx
)
docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/lyft_dataset.py
0 → 100644
View file @
ba3cd005
# Copyright (c) OpenMMLab. All rights reserved.
import
os
import
tempfile
from
os
import
path
as
osp
import
mmcv
import
numpy
as
np
import
pandas
as
pd
from
lyft_dataset_sdk.lyftdataset
import
LyftDataset
as
Lyft
from
lyft_dataset_sdk.utils.data_classes
import
Box
as
LyftBox
from
pyquaternion
import
Quaternion
from
mmdet3d.core.evaluation.lyft_eval
import
lyft_eval
from
..core
import
show_result
from
..core.bbox
import
Box3DMode
,
Coord3DMode
,
LiDARInstance3DBoxes
from
.builder
import
DATASETS
from
.custom_3d
import
Custom3DDataset
from
.pipelines
import
Compose
@
DATASETS
.
register_module
()
class
LyftDataset
(
Custom3DDataset
):
r
"""Lyft Dataset.
This class serves as the API for experiments on the Lyft Dataset.
Please refer to
`<https://www.kaggle.com/c/3d-object-detection-for-autonomous-vehicles/data>`_
for data downloading.
Args:
ann_file (str): Path of annotation file.
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
data_root (str): Path of dataset root.
classes (tuple[str], optional): Classes used in the dataset.
Defaults to None.
load_interval (int, optional): Interval of loading the dataset. It is
used to uniformly sample the dataset. Defaults to 1.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to None.
box_type_3d (str, optional): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR' in this dataset. Available options includes
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates.
filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
"""
# noqa: E501
NameMapping
=
{
'bicycle'
:
'bicycle'
,
'bus'
:
'bus'
,
'car'
:
'car'
,
'emergency_vehicle'
:
'emergency_vehicle'
,
'motorcycle'
:
'motorcycle'
,
'other_vehicle'
:
'other_vehicle'
,
'pedestrian'
:
'pedestrian'
,
'truck'
:
'truck'
,
'animal'
:
'animal'
}
DefaultAttribute
=
{
'car'
:
'is_stationary'
,
'truck'
:
'is_stationary'
,
'bus'
:
'is_stationary'
,
'emergency_vehicle'
:
'is_stationary'
,
'other_vehicle'
:
'is_stationary'
,
'motorcycle'
:
'is_stationary'
,
'bicycle'
:
'is_stationary'
,
'pedestrian'
:
'is_stationary'
,
'animal'
:
'is_stationary'
}
CLASSES
=
(
'car'
,
'truck'
,
'bus'
,
'emergency_vehicle'
,
'other_vehicle'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'animal'
)
def
__init__
(
self
,
ann_file
,
pipeline
=
None
,
data_root
=
None
,
classes
=
None
,
load_interval
=
1
,
modality
=
None
,
box_type_3d
=
'LiDAR'
,
filter_empty_gt
=
True
,
test_mode
=
False
,
**
kwargs
):
self
.
load_interval
=
load_interval
super
().
__init__
(
data_root
=
data_root
,
ann_file
=
ann_file
,
pipeline
=
pipeline
,
classes
=
classes
,
modality
=
modality
,
box_type_3d
=
box_type_3d
,
filter_empty_gt
=
filter_empty_gt
,
test_mode
=
test_mode
,
**
kwargs
)
if
self
.
modality
is
None
:
self
.
modality
=
dict
(
use_camera
=
False
,
use_lidar
=
True
,
use_radar
=
False
,
use_map
=
False
,
use_external
=
False
,
)
def
load_annotations
(
self
,
ann_file
):
"""Load annotations from ann_file.
Args:
ann_file (str): Path of the annotation file.
Returns:
list[dict]: List of annotations sorted by timestamps.
"""
# loading data from a file-like object needs file format
data
=
mmcv
.
load
(
ann_file
,
file_format
=
'pkl'
)
data_infos
=
list
(
sorted
(
data
[
'infos'
],
key
=
lambda
e
:
e
[
'timestamp'
]))
data_infos
=
data_infos
[::
self
.
load_interval
]
self
.
metadata
=
data
[
'metadata'
]
self
.
version
=
self
.
metadata
[
'version'
]
return
data_infos
def
get_data_info
(
self
,
index
):
"""Get data info according to the given index.
Args:
index (int): Index of the sample data to get.
Returns:
dict: Data information that will be passed to the data
preprocessing pipelines. It includes the following keys:
- sample_idx (str): sample index
- pts_filename (str): filename of point clouds
- sweeps (list[dict]): infos of sweeps
- timestamp (float): sample timestamp
- img_filename (str, optional): image filename
- lidar2img (list[np.ndarray], optional): transformations
from lidar to different cameras
- ann_info (dict): annotation info
"""
info
=
self
.
data_infos
[
index
]
# standard protocol modified from SECOND.Pytorch
input_dict
=
dict
(
sample_idx
=
info
[
'token'
],
pts_filename
=
info
[
'lidar_path'
],
sweeps
=
info
[
'sweeps'
],
timestamp
=
info
[
'timestamp'
]
/
1e6
,
)
if
self
.
modality
[
'use_camera'
]:
image_paths
=
[]
lidar2img_rts
=
[]
for
cam_type
,
cam_info
in
info
[
'cams'
].
items
():
image_paths
.
append
(
cam_info
[
'data_path'
])
# obtain lidar to image transformation matrix
lidar2cam_r
=
np
.
linalg
.
inv
(
cam_info
[
'sensor2lidar_rotation'
])
lidar2cam_t
=
cam_info
[
'sensor2lidar_translation'
]
@
lidar2cam_r
.
T
lidar2cam_rt
=
np
.
eye
(
4
)
lidar2cam_rt
[:
3
,
:
3
]
=
lidar2cam_r
.
T
lidar2cam_rt
[
3
,
:
3
]
=
-
lidar2cam_t
intrinsic
=
cam_info
[
'cam_intrinsic'
]
viewpad
=
np
.
eye
(
4
)
viewpad
[:
intrinsic
.
shape
[
0
],
:
intrinsic
.
shape
[
1
]]
=
intrinsic
lidar2img_rt
=
(
viewpad
@
lidar2cam_rt
.
T
)
lidar2img_rts
.
append
(
lidar2img_rt
)
input_dict
.
update
(
dict
(
img_filename
=
image_paths
,
lidar2img
=
lidar2img_rts
,
))
if
not
self
.
test_mode
:
annos
=
self
.
get_ann_info
(
index
)
input_dict
[
'ann_info'
]
=
annos
return
input_dict
def
get_ann_info
(
self
,
index
):
"""Get annotation info according to the given index.
Args:
index (int): Index of the annotation data to get.
Returns:
dict: Annotation information consists of the following keys:
- gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`):
3D ground truth bboxes.
- gt_labels_3d (np.ndarray): Labels of ground truths.
- gt_names (list[str]): Class names of ground truths.
"""
info
=
self
.
data_infos
[
index
]
gt_bboxes_3d
=
info
[
'gt_boxes'
]
gt_names_3d
=
info
[
'gt_names'
]
gt_labels_3d
=
[]
for
cat
in
gt_names_3d
:
if
cat
in
self
.
CLASSES
:
gt_labels_3d
.
append
(
self
.
CLASSES
.
index
(
cat
))
else
:
gt_labels_3d
.
append
(
-
1
)
gt_labels_3d
=
np
.
array
(
gt_labels_3d
)
if
'gt_shape'
in
info
:
gt_shape
=
info
[
'gt_shape'
]
gt_bboxes_3d
=
np
.
concatenate
([
gt_bboxes_3d
,
gt_shape
],
axis
=-
1
)
# the lyft box center is [0.5, 0.5, 0.5], we change it to be
# the same as KITTI (0.5, 0.5, 0)
gt_bboxes_3d
=
LiDARInstance3DBoxes
(
gt_bboxes_3d
,
box_dim
=
gt_bboxes_3d
.
shape
[
-
1
],
origin
=
(
0.5
,
0.5
,
0.5
)).
convert_to
(
self
.
box_mode_3d
)
anns_results
=
dict
(
gt_bboxes_3d
=
gt_bboxes_3d
,
gt_labels_3d
=
gt_labels_3d
,
)
return
anns_results
def
_format_bbox
(
self
,
results
,
jsonfile_prefix
=
None
):
"""Convert the results to the standard format.
Args:
results (list[dict]): Testing results of the dataset.
jsonfile_prefix (str): The prefix of the output jsonfile.
You can specify the output directory/filename by
modifying the jsonfile_prefix. Default: None.
Returns:
str: Path of the output json file.
"""
lyft_annos
=
{}
mapped_class_names
=
self
.
CLASSES
print
(
'Start to convert detection format...'
)
for
sample_id
,
det
in
enumerate
(
mmcv
.
track_iter_progress
(
results
)):
annos
=
[]
boxes
=
output_to_lyft_box
(
det
)
sample_token
=
self
.
data_infos
[
sample_id
][
'token'
]
boxes
=
lidar_lyft_box_to_global
(
self
.
data_infos
[
sample_id
],
boxes
)
for
i
,
box
in
enumerate
(
boxes
):
name
=
mapped_class_names
[
box
.
label
]
lyft_anno
=
dict
(
sample_token
=
sample_token
,
translation
=
box
.
center
.
tolist
(),
size
=
box
.
wlh
.
tolist
(),
rotation
=
box
.
orientation
.
elements
.
tolist
(),
name
=
name
,
score
=
box
.
score
)
annos
.
append
(
lyft_anno
)
lyft_annos
[
sample_token
]
=
annos
lyft_submissions
=
{
'meta'
:
self
.
modality
,
'results'
:
lyft_annos
,
}
mmcv
.
mkdir_or_exist
(
jsonfile_prefix
)
res_path
=
osp
.
join
(
jsonfile_prefix
,
'results_lyft.json'
)
print
(
'Results writes to'
,
res_path
)
mmcv
.
dump
(
lyft_submissions
,
res_path
)
return
res_path
def
_evaluate_single
(
self
,
result_path
,
logger
=
None
,
metric
=
'bbox'
,
result_name
=
'pts_bbox'
):
"""Evaluation for a single model in Lyft protocol.
Args:
result_path (str): Path of the result file.
logger (logging.Logger | str, optional): Logger used for printing
related information during evaluation. Default: None.
metric (str, optional): Metric name used for evaluation.
Default: 'bbox'.
result_name (str, optional): Result name in the metric prefix.
Default: 'pts_bbox'.
Returns:
dict: Dictionary of evaluation details.
"""
output_dir
=
osp
.
join
(
*
osp
.
split
(
result_path
)[:
-
1
])
lyft
=
Lyft
(
data_path
=
osp
.
join
(
self
.
data_root
,
self
.
version
),
json_path
=
osp
.
join
(
self
.
data_root
,
self
.
version
,
self
.
version
),
verbose
=
True
)
eval_set_map
=
{
'v1.01-train'
:
'val'
,
}
metrics
=
lyft_eval
(
lyft
,
self
.
data_root
,
result_path
,
eval_set_map
[
self
.
version
],
output_dir
,
logger
)
# record metrics
detail
=
dict
()
metric_prefix
=
f
'
{
result_name
}
_Lyft'
for
i
,
name
in
enumerate
(
metrics
[
'class_names'
]):
AP
=
float
(
metrics
[
'mAPs_cate'
][
i
])
detail
[
f
'
{
metric_prefix
}
/
{
name
}
_AP'
]
=
AP
detail
[
f
'
{
metric_prefix
}
/mAP'
]
=
metrics
[
'Final mAP'
]
return
detail
def
format_results
(
self
,
results
,
jsonfile_prefix
=
None
,
csv_savepath
=
None
):
"""Format the results to json (standard format for COCO evaluation).
Args:
results (list[dict]): Testing results of the dataset.
jsonfile_prefix (str): The prefix of json files. It includes
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
csv_savepath (str): The path for saving csv files.
It includes the file path and the csv filename,
e.g., "a/b/filename.csv". If not specified,
the result will not be converted to csv file.
Returns:
tuple: Returns (result_files, tmp_dir), where `result_files` is a
dict containing the json filepaths, `tmp_dir` is the temporal
directory created for saving json files when
`jsonfile_prefix` is not specified.
"""
assert
isinstance
(
results
,
list
),
'results must be a list'
assert
len
(
results
)
==
len
(
self
),
(
'The length of results is not equal to the dataset len: {} != {}'
.
format
(
len
(
results
),
len
(
self
)))
if
jsonfile_prefix
is
None
:
tmp_dir
=
tempfile
.
TemporaryDirectory
()
jsonfile_prefix
=
osp
.
join
(
tmp_dir
.
name
,
'results'
)
else
:
tmp_dir
=
None
# currently the output prediction results could be in two formats
# 1. list of dict('boxes_3d': ..., 'scores_3d': ..., 'labels_3d': ...)
# 2. list of dict('pts_bbox' or 'img_bbox':
# dict('boxes_3d': ..., 'scores_3d': ..., 'labels_3d': ...))
# this is a workaround to enable evaluation of both formats on Lyft
# refer to https://github.com/open-mmlab/mmdetection3d/issues/449
if
not
(
'pts_bbox'
in
results
[
0
]
or
'img_bbox'
in
results
[
0
]):
result_files
=
self
.
_format_bbox
(
results
,
jsonfile_prefix
)
else
:
# should take the inner dict out of 'pts_bbox' or 'img_bbox' dict
result_files
=
dict
()
for
name
in
results
[
0
]:
print
(
f
'
\n
Formating bboxes of
{
name
}
'
)
results_
=
[
out
[
name
]
for
out
in
results
]
tmp_file_
=
osp
.
join
(
jsonfile_prefix
,
name
)
result_files
.
update
(
{
name
:
self
.
_format_bbox
(
results_
,
tmp_file_
)})
if
csv_savepath
is
not
None
:
self
.
json2csv
(
result_files
[
'pts_bbox'
],
csv_savepath
)
return
result_files
,
tmp_dir
def
evaluate
(
self
,
results
,
metric
=
'bbox'
,
logger
=
None
,
jsonfile_prefix
=
None
,
csv_savepath
=
None
,
result_names
=
[
'pts_bbox'
],
show
=
False
,
out_dir
=
None
,
pipeline
=
None
):
"""Evaluation in Lyft protocol.
Args:
results (list[dict]): Testing results of the dataset.
metric (str | list[str], optional): Metrics to be evaluated.
Default: 'bbox'.
logger (logging.Logger | str, optional): Logger used for printing
related information during evaluation. Default: None.
jsonfile_prefix (str, optional): The prefix of json files including
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
csv_savepath (str, optional): The path for saving csv files.
It includes the file path and the csv filename,
e.g., "a/b/filename.csv". If not specified,
the result will not be converted to csv file.
result_names (list[str], optional): Result names in the
metric prefix. Default: ['pts_bbox'].
show (bool, optional): Whether to visualize.
Default: False.
out_dir (str, optional): Path to save the visualization results.
Default: None.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
Returns:
dict[str, float]: Evaluation results.
"""
result_files
,
tmp_dir
=
self
.
format_results
(
results
,
jsonfile_prefix
,
csv_savepath
)
if
isinstance
(
result_files
,
dict
):
results_dict
=
dict
()
for
name
in
result_names
:
print
(
f
'Evaluating bboxes of
{
name
}
'
)
ret_dict
=
self
.
_evaluate_single
(
result_files
[
name
])
results_dict
.
update
(
ret_dict
)
elif
isinstance
(
result_files
,
str
):
results_dict
=
self
.
_evaluate_single
(
result_files
)
if
tmp_dir
is
not
None
:
tmp_dir
.
cleanup
()
if
show
or
out_dir
:
self
.
show
(
results
,
out_dir
,
show
=
show
,
pipeline
=
pipeline
)
return
results_dict
def
_build_default_pipeline
(
self
):
"""Build the default pipeline for this dataset."""
pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
dict
(
backend
=
'disk'
)),
dict
(
type
=
'LoadPointsFromMultiSweeps'
,
sweeps_num
=
10
,
file_client_args
=
dict
(
backend
=
'disk'
)),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
self
.
CLASSES
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
]
return
Compose
(
pipeline
)
def
show
(
self
,
results
,
out_dir
,
show
=
False
,
pipeline
=
None
):
"""Results visualization.
Args:
results (list[dict]): List of bounding boxes results.
out_dir (str): Output directory of visualization result.
show (bool): Whether to visualize the results online.
Default: False.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
"""
assert
out_dir
is
not
None
,
'Expect out_dir, got none.'
pipeline
=
self
.
_get_pipeline
(
pipeline
)
for
i
,
result
in
enumerate
(
results
):
if
'pts_bbox'
in
result
.
keys
():
result
=
result
[
'pts_bbox'
]
data_info
=
self
.
data_infos
[
i
]
pts_path
=
data_info
[
'lidar_path'
]
file_name
=
osp
.
split
(
pts_path
)[
-
1
].
split
(
'.'
)[
0
]
points
=
self
.
_extract_data
(
i
,
pipeline
,
'points'
).
numpy
()
points
=
Coord3DMode
.
convert_point
(
points
,
Coord3DMode
.
LIDAR
,
Coord3DMode
.
DEPTH
)
inds
=
result
[
'scores_3d'
]
>
0.1
gt_bboxes
=
self
.
get_ann_info
(
i
)[
'gt_bboxes_3d'
].
tensor
.
numpy
()
show_gt_bboxes
=
Box3DMode
.
convert
(
gt_bboxes
,
Box3DMode
.
LIDAR
,
Box3DMode
.
DEPTH
)
pred_bboxes
=
result
[
'boxes_3d'
][
inds
].
tensor
.
numpy
()
show_pred_bboxes
=
Box3DMode
.
convert
(
pred_bboxes
,
Box3DMode
.
LIDAR
,
Box3DMode
.
DEPTH
)
show_result
(
points
,
show_gt_bboxes
,
show_pred_bboxes
,
out_dir
,
file_name
,
show
)
def
json2csv
(
self
,
json_path
,
csv_savepath
):
"""Convert the json file to csv format for submission.
Args:
json_path (str): Path of the result json file.
csv_savepath (str): Path to save the csv file.
"""
results
=
mmcv
.
load
(
json_path
)[
'results'
]
sample_list_path
=
osp
.
join
(
self
.
data_root
,
'sample_submission.csv'
)
data
=
pd
.
read_csv
(
sample_list_path
)
Id_list
=
list
(
data
[
'Id'
])
pred_list
=
list
(
data
[
'PredictionString'
])
cnt
=
0
print
(
'Converting the json to csv...'
)
for
token
in
results
.
keys
():
cnt
+=
1
predictions
=
results
[
token
]
prediction_str
=
''
for
i
in
range
(
len
(
predictions
)):
prediction_str
+=
\
str
(
predictions
[
i
][
'score'
])
+
' '
+
\
str
(
predictions
[
i
][
'translation'
][
0
])
+
' '
+
\
str
(
predictions
[
i
][
'translation'
][
1
])
+
' '
+
\
str
(
predictions
[
i
][
'translation'
][
2
])
+
' '
+
\
str
(
predictions
[
i
][
'size'
][
0
])
+
' '
+
\
str
(
predictions
[
i
][
'size'
][
1
])
+
' '
+
\
str
(
predictions
[
i
][
'size'
][
2
])
+
' '
+
\
str
(
Quaternion
(
list
(
predictions
[
i
][
'rotation'
]))
.
yaw_pitch_roll
[
0
])
+
' '
+
\
predictions
[
i
][
'name'
]
+
' '
prediction_str
=
prediction_str
[:
-
1
]
idx
=
Id_list
.
index
(
token
)
pred_list
[
idx
]
=
prediction_str
df
=
pd
.
DataFrame
({
'Id'
:
Id_list
,
'PredictionString'
:
pred_list
})
mmcv
.
mkdir_or_exist
(
os
.
path
.
dirname
(
csv_savepath
))
df
.
to_csv
(
csv_savepath
,
index
=
False
)
def
output_to_lyft_box
(
detection
):
"""Convert the output to the box class in the Lyft.
Args:
detection (dict): Detection results.
Returns:
list[:obj:`LyftBox`]: List of standard LyftBoxes.
"""
box3d
=
detection
[
'boxes_3d'
]
scores
=
detection
[
'scores_3d'
].
numpy
()
labels
=
detection
[
'labels_3d'
].
numpy
()
box_gravity_center
=
box3d
.
gravity_center
.
numpy
()
box_dims
=
box3d
.
dims
.
numpy
()
box_yaw
=
box3d
.
yaw
.
numpy
()
# our LiDAR coordinate system -> Lyft box coordinate system
lyft_box_dims
=
box_dims
[:,
[
1
,
0
,
2
]]
box_list
=
[]
for
i
in
range
(
len
(
box3d
)):
quat
=
Quaternion
(
axis
=
[
0
,
0
,
1
],
radians
=
box_yaw
[
i
])
box
=
LyftBox
(
box_gravity_center
[
i
],
lyft_box_dims
[
i
],
quat
,
label
=
labels
[
i
],
score
=
scores
[
i
])
box_list
.
append
(
box
)
return
box_list
def
lidar_lyft_box_to_global
(
info
,
boxes
):
"""Convert the box from ego to global coordinate.
Args:
info (dict): Info for a specific sample data, including the
calibration information.
boxes (list[:obj:`LyftBox`]): List of predicted LyftBoxes.
Returns:
list: List of standard LyftBoxes in the global
coordinate.
"""
box_list
=
[]
for
box
in
boxes
:
# Move box to ego vehicle coord system
box
.
rotate
(
Quaternion
(
info
[
'lidar2ego_rotation'
]))
box
.
translate
(
np
.
array
(
info
[
'lidar2ego_translation'
]))
# Move box to global coord system
box
.
rotate
(
Quaternion
(
info
[
'ego2global_rotation'
]))
box
.
translate
(
np
.
array
(
info
[
'ego2global_translation'
]))
box_list
.
append
(
box
)
return
box_list
docker-hub/FlashOCC/Flashocc/mmdetection3d/mmdet3d/datasets/nuscenes_dataset.py
0 → 100644
View file @
ba3cd005
# Copyright (c) OpenMMLab. All rights reserved.
import
tempfile
from
os
import
path
as
osp
import
mmcv
import
numpy
as
np
import
pyquaternion
from
nuscenes.utils.data_classes
import
Box
as
NuScenesBox
from
..core
import
show_result
from
..core.bbox
import
Box3DMode
,
Coord3DMode
,
LiDARInstance3DBoxes
from
.builder
import
DATASETS
from
.custom_3d
import
Custom3DDataset
from
.pipelines
import
Compose
@
DATASETS
.
register_module
()
class
NuScenesDataset
(
Custom3DDataset
):
r
"""NuScenes Dataset.
This class serves as the API for experiments on the NuScenes Dataset.
Please refer to `NuScenes Dataset <https://www.nuscenes.org/download>`_
for data downloading.
Args:
ann_file (str): Path of annotation file.
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
data_root (str): Path of dataset root.
classes (tuple[str], optional): Classes used in the dataset.
Defaults to None.
load_interval (int, optional): Interval of loading the dataset. It is
used to uniformly sample the dataset. Defaults to 1.
with_velocity (bool, optional): Whether include velocity prediction
into the experiments. Defaults to True.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to None.
box_type_3d (str, optional): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR' in this dataset. Available options includes.
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates.
filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
eval_version (bool, optional): Configuration version of evaluation.
Defaults to 'detection_cvpr_2019'.
use_valid_flag (bool, optional): Whether to use `use_valid_flag` key
in the info file as mask to filter gt_boxes and gt_names.
Defaults to False.
"""
NameMapping
=
{
'movable_object.barrier'
:
'barrier'
,
'vehicle.bicycle'
:
'bicycle'
,
'vehicle.bus.bendy'
:
'bus'
,
'vehicle.bus.rigid'
:
'bus'
,
'vehicle.car'
:
'car'
,
'vehicle.construction'
:
'construction_vehicle'
,
'vehicle.motorcycle'
:
'motorcycle'
,
'human.pedestrian.adult'
:
'pedestrian'
,
'human.pedestrian.child'
:
'pedestrian'
,
'human.pedestrian.construction_worker'
:
'pedestrian'
,
'human.pedestrian.police_officer'
:
'pedestrian'
,
'movable_object.trafficcone'
:
'traffic_cone'
,
'vehicle.trailer'
:
'trailer'
,
'vehicle.truck'
:
'truck'
}
DefaultAttribute
=
{
'car'
:
'vehicle.parked'
,
'pedestrian'
:
'pedestrian.moving'
,
'trailer'
:
'vehicle.parked'
,
'truck'
:
'vehicle.parked'
,
'bus'
:
'vehicle.moving'
,
'motorcycle'
:
'cycle.without_rider'
,
'construction_vehicle'
:
'vehicle.parked'
,
'bicycle'
:
'cycle.without_rider'
,
'barrier'
:
''
,
'traffic_cone'
:
''
,
}
AttrMapping
=
{
'cycle.with_rider'
:
0
,
'cycle.without_rider'
:
1
,
'pedestrian.moving'
:
2
,
'pedestrian.standing'
:
3
,
'pedestrian.sitting_lying_down'
:
4
,
'vehicle.moving'
:
5
,
'vehicle.parked'
:
6
,
'vehicle.stopped'
:
7
,
}
AttrMapping_rev
=
[
'cycle.with_rider'
,
'cycle.without_rider'
,
'pedestrian.moving'
,
'pedestrian.standing'
,
'pedestrian.sitting_lying_down'
,
'vehicle.moving'
,
'vehicle.parked'
,
'vehicle.stopped'
,
]
# https://github.com/nutonomy/nuscenes-devkit/blob/57889ff20678577025326cfc24e57424a829be0a/python-sdk/nuscenes/eval/detection/evaluate.py#L222 # noqa
ErrNameMapping
=
{
'trans_err'
:
'mATE'
,
'scale_err'
:
'mASE'
,
'orient_err'
:
'mAOE'
,
'vel_err'
:
'mAVE'
,
'attr_err'
:
'mAAE'
}
CLASSES
=
(
'car'
,
'truck'
,
'trailer'
,
'bus'
,
'construction_vehicle'
,
'bicycle'
,
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
)
def
__init__
(
self
,
ann_file
,
pipeline
=
None
,
data_root
=
None
,
classes
=
None
,
load_interval
=
1
,
with_velocity
=
True
,
modality
=
None
,
box_type_3d
=
'LiDAR'
,
filter_empty_gt
=
True
,
test_mode
=
False
,
eval_version
=
'detection_cvpr_2019'
,
use_valid_flag
=
False
):
self
.
load_interval
=
load_interval
self
.
use_valid_flag
=
use_valid_flag
super
().
__init__
(
data_root
=
data_root
,
ann_file
=
ann_file
,
pipeline
=
pipeline
,
classes
=
classes
,
modality
=
modality
,
box_type_3d
=
box_type_3d
,
filter_empty_gt
=
filter_empty_gt
,
test_mode
=
test_mode
)
self
.
with_velocity
=
with_velocity
self
.
eval_version
=
eval_version
from
nuscenes.eval.detection.config
import
config_factory
self
.
eval_detection_configs
=
config_factory
(
self
.
eval_version
)
if
self
.
modality
is
None
:
self
.
modality
=
dict
(
use_camera
=
False
,
use_lidar
=
True
,
use_radar
=
False
,
use_map
=
False
,
use_external
=
False
,
)
def
get_cat_ids
(
self
,
idx
):
"""Get category distribution of single scene.
Args:
idx (int): Index of the data_info.
Returns:
dict[list]: for each category, if the current scene
contains such boxes, store a list containing idx,
otherwise, store empty list.
"""
info
=
self
.
data_infos
[
idx
]
if
self
.
use_valid_flag
:
mask
=
info
[
'valid_flag'
]
gt_names
=
set
(
info
[
'gt_names'
][
mask
])
else
:
gt_names
=
set
(
info
[
'gt_names'
])
cat_ids
=
[]
for
name
in
gt_names
:
if
name
in
self
.
CLASSES
:
cat_ids
.
append
(
self
.
cat2id
[
name
])
return
cat_ids
def
load_annotations
(
self
,
ann_file
):
"""Load annotations from ann_file.
Args:
ann_file (str): Path of the annotation file.
Returns:
list[dict]: List of annotations sorted by timestamps.
"""
data
=
mmcv
.
load
(
ann_file
,
file_format
=
'pkl'
)
data_infos
=
list
(
sorted
(
data
[
'infos'
],
key
=
lambda
e
:
e
[
'timestamp'
]))
data_infos
=
data_infos
[::
self
.
load_interval
]
self
.
metadata
=
data
[
'metadata'
]
self
.
version
=
self
.
metadata
[
'version'
]
return
data_infos
def
get_data_info
(
self
,
index
):
"""Get data info according to the given index.
Args:
index (int): Index of the sample data to get.
Returns:
dict: Data information that will be passed to the data
preprocessing pipelines. It includes the following keys:
- sample_idx (str): Sample index.
- pts_filename (str): Filename of point clouds.
- sweeps (list[dict]): Infos of sweeps.
- timestamp (float): Sample timestamp.
- img_filename (str, optional): Image filename.
- lidar2img (list[np.ndarray], optional): Transformations
from lidar to different cameras.
- ann_info (dict): Annotation info.
"""
info
=
self
.
data_infos
[
index
]
# standard protocol modified from SECOND.Pytorch
input_dict
=
dict
(
sample_idx
=
info
[
'token'
],
pts_filename
=
info
[
'lidar_path'
],
sweeps
=
info
[
'sweeps'
],
timestamp
=
info
[
'timestamp'
]
/
1e6
,
)
if
self
.
modality
[
'use_camera'
]:
image_paths
=
[]
lidar2img_rts
=
[]
for
cam_type
,
cam_info
in
info
[
'cams'
].
items
():
image_paths
.
append
(
cam_info
[
'data_path'
])
# obtain lidar to image transformation matrix
lidar2cam_r
=
np
.
linalg
.
inv
(
cam_info
[
'sensor2lidar_rotation'
])
lidar2cam_t
=
cam_info
[
'sensor2lidar_translation'
]
@
lidar2cam_r
.
T
lidar2cam_rt
=
np
.
eye
(
4
)
lidar2cam_rt
[:
3
,
:
3
]
=
lidar2cam_r
.
T
lidar2cam_rt
[
3
,
:
3
]
=
-
lidar2cam_t
intrinsic
=
cam_info
[
'cam_intrinsic'
]
viewpad
=
np
.
eye
(
4
)
viewpad
[:
intrinsic
.
shape
[
0
],
:
intrinsic
.
shape
[
1
]]
=
intrinsic
lidar2img_rt
=
(
viewpad
@
lidar2cam_rt
.
T
)
lidar2img_rts
.
append
(
lidar2img_rt
)
input_dict
.
update
(
dict
(
img_filename
=
image_paths
,
lidar2img
=
lidar2img_rts
,
))
if
not
self
.
test_mode
:
annos
=
self
.
get_ann_info
(
index
)
input_dict
[
'ann_info'
]
=
annos
return
input_dict
def
get_ann_info
(
self
,
index
):
"""Get annotation info according to the given index.
Args:
index (int): Index of the annotation data to get.
Returns:
dict: Annotation information consists of the following keys:
- gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`):
3D ground truth bboxes
- gt_labels_3d (np.ndarray): Labels of ground truths.
- gt_names (list[str]): Class names of ground truths.
"""
info
=
self
.
data_infos
[
index
]
# filter out bbox containing no points
if
self
.
use_valid_flag
:
mask
=
info
[
'valid_flag'
]
else
:
mask
=
info
[
'num_lidar_pts'
]
>
0
gt_bboxes_3d
=
info
[
'gt_boxes'
][
mask
]
gt_names_3d
=
info
[
'gt_names'
][
mask
]
gt_labels_3d
=
[]
for
cat
in
gt_names_3d
:
if
cat
in
self
.
CLASSES
:
gt_labels_3d
.
append
(
self
.
CLASSES
.
index
(
cat
))
else
:
gt_labels_3d
.
append
(
-
1
)
gt_labels_3d
=
np
.
array
(
gt_labels_3d
)
if
self
.
with_velocity
:
gt_velocity
=
info
[
'gt_velocity'
][
mask
]
nan_mask
=
np
.
isnan
(
gt_velocity
[:,
0
])
gt_velocity
[
nan_mask
]
=
[
0.0
,
0.0
]
gt_bboxes_3d
=
np
.
concatenate
([
gt_bboxes_3d
,
gt_velocity
],
axis
=-
1
)
# the nuscenes box center is [0.5, 0.5, 0.5], we change it to be
# the same as KITTI (0.5, 0.5, 0)
gt_bboxes_3d
=
LiDARInstance3DBoxes
(
gt_bboxes_3d
,
box_dim
=
gt_bboxes_3d
.
shape
[
-
1
],
origin
=
(
0.5
,
0.5
,
0.5
)).
convert_to
(
self
.
box_mode_3d
)
anns_results
=
dict
(
gt_bboxes_3d
=
gt_bboxes_3d
,
gt_labels_3d
=
gt_labels_3d
,
gt_names
=
gt_names_3d
)
return
anns_results
def
_format_bbox
(
self
,
results
,
jsonfile_prefix
=
None
):
"""Convert the results to the standard format.
Args:
results (list[dict]): Testing results of the dataset.
jsonfile_prefix (str): The prefix of the output jsonfile.
You can specify the output directory/filename by
modifying the jsonfile_prefix. Default: None.
Returns:
str: Path of the output json file.
"""
nusc_annos
=
{}
mapped_class_names
=
self
.
CLASSES
print
(
'Start to convert detection format...'
)
for
sample_id
,
det
in
enumerate
(
mmcv
.
track_iter_progress
(
results
)):
annos
=
[]
boxes
=
output_to_nusc_box
(
det
,
self
.
with_velocity
)
sample_token
=
self
.
data_infos
[
sample_id
][
'token'
]
boxes
=
lidar_nusc_box_to_global
(
self
.
data_infos
[
sample_id
],
boxes
,
mapped_class_names
,
self
.
eval_detection_configs
,
self
.
eval_version
)
for
i
,
box
in
enumerate
(
boxes
):
name
=
mapped_class_names
[
box
.
label
]
if
np
.
sqrt
(
box
.
velocity
[
0
]
**
2
+
box
.
velocity
[
1
]
**
2
)
>
0.2
:
if
name
in
[
'car'
,
'construction_vehicle'
,
'bus'
,
'truck'
,
'trailer'
,
]:
attr
=
'vehicle.moving'
elif
name
in
[
'bicycle'
,
'motorcycle'
]:
attr
=
'cycle.with_rider'
else
:
attr
=
NuScenesDataset
.
DefaultAttribute
[
name
]
else
:
if
name
in
[
'pedestrian'
]:
attr
=
'pedestrian.standing'
elif
name
in
[
'bus'
]:
attr
=
'vehicle.stopped'
else
:
attr
=
NuScenesDataset
.
DefaultAttribute
[
name
]
nusc_anno
=
dict
(
sample_token
=
sample_token
,
translation
=
box
.
center
.
tolist
(),
size
=
box
.
wlh
.
tolist
(),
rotation
=
box
.
orientation
.
elements
.
tolist
(),
velocity
=
box
.
velocity
[:
2
].
tolist
(),
detection_name
=
name
,
detection_score
=
box
.
score
,
attribute_name
=
attr
)
annos
.
append
(
nusc_anno
)
nusc_annos
[
sample_token
]
=
annos
nusc_submissions
=
{
'meta'
:
self
.
modality
,
'results'
:
nusc_annos
,
}
mmcv
.
mkdir_or_exist
(
jsonfile_prefix
)
res_path
=
osp
.
join
(
jsonfile_prefix
,
'results_nusc.json'
)
print
(
'Results writes to'
,
res_path
)
mmcv
.
dump
(
nusc_submissions
,
res_path
)
return
res_path
def
_evaluate_single
(
self
,
result_path
,
logger
=
None
,
metric
=
'bbox'
,
result_name
=
'pts_bbox'
):
"""Evaluation for a single model in nuScenes protocol.
Args:
result_path (str): Path of the result file.
logger (logging.Logger | str, optional): Logger used for printing
related information during evaluation. Default: None.
metric (str, optional): Metric name used for evaluation.
Default: 'bbox'.
result_name (str, optional): Result name in the metric prefix.
Default: 'pts_bbox'.
Returns:
dict: Dictionary of evaluation details.
"""
from
nuscenes
import
NuScenes
from
nuscenes.eval.detection.evaluate
import
NuScenesEval
output_dir
=
osp
.
join
(
*
osp
.
split
(
result_path
)[:
-
1
])
nusc
=
NuScenes
(
version
=
self
.
version
,
dataroot
=
self
.
data_root
,
verbose
=
False
)
eval_set_map
=
{
'v1.0-mini'
:
'mini_val'
,
'v1.0-trainval'
:
'val'
,
}
nusc_eval
=
NuScenesEval
(
nusc
,
config
=
self
.
eval_detection_configs
,
result_path
=
result_path
,
eval_set
=
eval_set_map
[
self
.
version
],
output_dir
=
output_dir
,
verbose
=
False
)
nusc_eval
.
main
(
render_curves
=
False
)
# record metrics
metrics
=
mmcv
.
load
(
osp
.
join
(
output_dir
,
'metrics_summary.json'
))
detail
=
dict
()
metric_prefix
=
f
'
{
result_name
}
_NuScenes'
for
name
in
self
.
CLASSES
:
for
k
,
v
in
metrics
[
'label_aps'
][
name
].
items
():
val
=
float
(
'{:.4f}'
.
format
(
v
))
detail
[
'{}/{}_AP_dist_{}'
.
format
(
metric_prefix
,
name
,
k
)]
=
val
for
k
,
v
in
metrics
[
'label_tp_errors'
][
name
].
items
():
val
=
float
(
'{:.4f}'
.
format
(
v
))
detail
[
'{}/{}_{}'
.
format
(
metric_prefix
,
name
,
k
)]
=
val
for
k
,
v
in
metrics
[
'tp_errors'
].
items
():
val
=
float
(
'{:.4f}'
.
format
(
v
))
detail
[
'{}/{}'
.
format
(
metric_prefix
,
self
.
ErrNameMapping
[
k
])]
=
val
detail
[
'{}/NDS'
.
format
(
metric_prefix
)]
=
metrics
[
'nd_score'
]
detail
[
'{}/mAP'
.
format
(
metric_prefix
)]
=
metrics
[
'mean_ap'
]
return
detail
def
format_results
(
self
,
results
,
jsonfile_prefix
=
None
):
"""Format the results to json (standard format for COCO evaluation).
Args:
results (list[dict]): Testing results of the dataset.
jsonfile_prefix (str): The prefix of json files. It includes
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
Returns:
tuple: Returns (result_files, tmp_dir), where `result_files` is a
dict containing the json filepaths, `tmp_dir` is the temporal
directory created for saving json files when
`jsonfile_prefix` is not specified.
"""
assert
isinstance
(
results
,
list
),
'results must be a list'
assert
len
(
results
)
==
len
(
self
),
(
'The length of results is not equal to the dataset len: {} != {}'
.
format
(
len
(
results
),
len
(
self
)))
if
jsonfile_prefix
is
None
:
tmp_dir
=
tempfile
.
TemporaryDirectory
()
jsonfile_prefix
=
osp
.
join
(
tmp_dir
.
name
,
'results'
)
else
:
tmp_dir
=
None
# currently the output prediction results could be in two formats
# 1. list of dict('boxes_3d': ..., 'scores_3d': ..., 'labels_3d': ...)
# 2. list of dict('pts_bbox' or 'img_bbox':
# dict('boxes_3d': ..., 'scores_3d': ..., 'labels_3d': ...))
# this is a workaround to enable evaluation of both formats on nuScenes
# refer to https://github.com/open-mmlab/mmdetection3d/issues/449
if
not
(
'pts_bbox'
in
results
[
0
]
or
'img_bbox'
in
results
[
0
]):
result_files
=
self
.
_format_bbox
(
results
,
jsonfile_prefix
)
else
:
# should take the inner dict out of 'pts_bbox' or 'img_bbox' dict
result_files
=
dict
()
for
name
in
results
[
0
]:
print
(
f
'
\n
Formating bboxes of
{
name
}
'
)
results_
=
[
out
[
name
]
for
out
in
results
]
tmp_file_
=
osp
.
join
(
jsonfile_prefix
,
name
)
result_files
.
update
(
{
name
:
self
.
_format_bbox
(
results_
,
tmp_file_
)})
return
result_files
,
tmp_dir
def
evaluate
(
self
,
results
,
metric
=
'bbox'
,
logger
=
None
,
jsonfile_prefix
=
None
,
result_names
=
[
'pts_bbox'
],
show
=
False
,
out_dir
=
None
,
pipeline
=
None
):
"""Evaluation in nuScenes protocol.
Args:
results (list[dict]): Testing results of the dataset.
metric (str | list[str], optional): Metrics to be evaluated.
Default: 'bbox'.
logger (logging.Logger | str, optional): Logger used for printing
related information during evaluation. Default: None.
jsonfile_prefix (str, optional): The prefix of json files including
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
show (bool, optional): Whether to visualize.
Default: False.
out_dir (str, optional): Path to save the visualization results.
Default: None.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
Returns:
dict[str, float]: Results of each evaluation metric.
"""
result_files
,
tmp_dir
=
self
.
format_results
(
results
,
jsonfile_prefix
)
if
isinstance
(
result_files
,
dict
):
results_dict
=
dict
()
for
name
in
result_names
:
print
(
'Evaluating bboxes of {}'
.
format
(
name
))
ret_dict
=
self
.
_evaluate_single
(
result_files
[
name
])
results_dict
.
update
(
ret_dict
)
elif
isinstance
(
result_files
,
str
):
results_dict
=
self
.
_evaluate_single
(
result_files
)
if
tmp_dir
is
not
None
:
tmp_dir
.
cleanup
()
if
show
or
out_dir
:
self
.
show
(
results
,
out_dir
,
show
=
show
,
pipeline
=
pipeline
)
return
results_dict
def
_build_default_pipeline
(
self
):
"""Build the default pipeline for this dataset."""
pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
dict
(
backend
=
'disk'
)),
dict
(
type
=
'LoadPointsFromMultiSweeps'
,
sweeps_num
=
10
,
file_client_args
=
dict
(
backend
=
'disk'
)),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
self
.
CLASSES
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
]
return
Compose
(
pipeline
)
def
show
(
self
,
results
,
out_dir
,
show
=
False
,
pipeline
=
None
):
"""Results visualization.
Args:
results (list[dict]): List of bounding boxes results.
out_dir (str): Output directory of visualization result.
show (bool): Whether to visualize the results online.
Default: False.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
"""
assert
out_dir
is
not
None
,
'Expect out_dir, got none.'
pipeline
=
self
.
_get_pipeline
(
pipeline
)
for
i
,
result
in
enumerate
(
results
):
if
'pts_bbox'
in
result
.
keys
():
result
=
result
[
'pts_bbox'
]
data_info
=
self
.
data_infos
[
i
]
pts_path
=
data_info
[
'lidar_path'
]
file_name
=
osp
.
split
(
pts_path
)[
-
1
].
split
(
'.'
)[
0
]
points
=
self
.
_extract_data
(
i
,
pipeline
,
'points'
).
numpy
()
# for now we convert points into depth mode
points
=
Coord3DMode
.
convert_point
(
points
,
Coord3DMode
.
LIDAR
,
Coord3DMode
.
DEPTH
)
inds
=
result
[
'scores_3d'
]
>
0.1
gt_bboxes
=
self
.
get_ann_info
(
i
)[
'gt_bboxes_3d'
].
tensor
.
numpy
()
show_gt_bboxes
=
Box3DMode
.
convert
(
gt_bboxes
,
Box3DMode
.
LIDAR
,
Box3DMode
.
DEPTH
)
pred_bboxes
=
result
[
'boxes_3d'
][
inds
].
tensor
.
numpy
()
show_pred_bboxes
=
Box3DMode
.
convert
(
pred_bboxes
,
Box3DMode
.
LIDAR
,
Box3DMode
.
DEPTH
)
show_result
(
points
,
show_gt_bboxes
,
show_pred_bboxes
,
out_dir
,
file_name
,
show
)
def
output_to_nusc_box
(
detection
,
with_velocity
=
True
):
"""Convert the output to the box class in the nuScenes.
Args:
detection (dict): Detection results.
- boxes_3d (:obj:`BaseInstance3DBoxes`): Detection bbox.
- scores_3d (torch.Tensor): Detection scores.
- labels_3d (torch.Tensor): Predicted box labels.
Returns:
list[:obj:`NuScenesBox`]: List of standard NuScenesBoxes.
"""
box3d
=
detection
[
'boxes_3d'
]
scores
=
detection
[
'scores_3d'
].
numpy
()
labels
=
detection
[
'labels_3d'
].
numpy
()
box_gravity_center
=
box3d
.
gravity_center
.
numpy
()
box_dims
=
box3d
.
dims
.
numpy
()
box_yaw
=
box3d
.
yaw
.
numpy
()
# our LiDAR coordinate system -> nuScenes box coordinate system
nus_box_dims
=
box_dims
[:,
[
1
,
0
,
2
]]
box_list
=
[]
for
i
in
range
(
len
(
box3d
)):
quat
=
pyquaternion
.
Quaternion
(
axis
=
[
0
,
0
,
1
],
radians
=
box_yaw
[
i
])
if
with_velocity
:
velocity
=
(
*
box3d
.
tensor
[
i
,
7
:
9
],
0.0
)
else
:
velocity
=
(
0
,
0
,
0
)
# velo_val = np.linalg.norm(box3d[i, 7:9])
# velo_ori = box3d[i, 6]
# velocity = (
# velo_val * np.cos(velo_ori), velo_val * np.sin(velo_ori), 0.0)
box
=
NuScenesBox
(
box_gravity_center
[
i
],
nus_box_dims
[
i
],
quat
,
label
=
labels
[
i
],
score
=
scores
[
i
],
velocity
=
velocity
)
box_list
.
append
(
box
)
return
box_list
def
lidar_nusc_box_to_global
(
info
,
boxes
,
classes
,
eval_configs
,
eval_version
=
'detection_cvpr_2019'
):
"""Convert the box from ego to global coordinate.
Args:
info (dict): Info for a specific sample data, including the
calibration information.
boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.
classes (list[str]): Mapped classes in the evaluation.
eval_configs (object): Evaluation configuration object.
eval_version (str, optional): Evaluation version.
Default: 'detection_cvpr_2019'
Returns:
list: List of standard NuScenesBoxes in the global
coordinate.
"""
box_list
=
[]
for
box
in
boxes
:
# Move box to ego vehicle coord system
box
.
rotate
(
pyquaternion
.
Quaternion
(
info
[
'lidar2ego_rotation'
]))
box
.
translate
(
np
.
array
(
info
[
'lidar2ego_translation'
]))
# filter det in ego.
cls_range_map
=
eval_configs
.
class_range
radius
=
np
.
linalg
.
norm
(
box
.
center
[:
2
],
2
)
det_range
=
cls_range_map
[
classes
[
box
.
label
]]
if
radius
>
det_range
:
continue
# Move box to global coord system
box
.
rotate
(
pyquaternion
.
Quaternion
(
info
[
'ego2global_rotation'
]))
box
.
translate
(
np
.
array
(
info
[
'ego2global_translation'
]))
box_list
.
append
(
box
)
return
box_list
Prev
1
…
17
18
19
20
21
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment