Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
32a4328b
Unverified
Commit
32a4328b
authored
Feb 24, 2022
by
Wenwei Zhang
Committed by
GitHub
Feb 24, 2022
Browse files
Bump version to V1.0.0rc0
Bump version to V1.0.0rc0
parents
86cc487c
a8817998
Changes
414
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1158 additions
and
278 deletions
+1158
-278
mmdet3d/datasets/lyft_dataset.py
mmdet3d/datasets/lyft_dataset.py
+35
-29
mmdet3d/datasets/nuscenes_dataset.py
mmdet3d/datasets/nuscenes_dataset.py
+34
-29
mmdet3d/datasets/nuscenes_mono_dataset.py
mmdet3d/datasets/nuscenes_mono_dataset.py
+32
-25
mmdet3d/datasets/pipelines/__init__.py
mmdet3d/datasets/pipelines/__init__.py
+14
-11
mmdet3d/datasets/pipelines/data_augment_utils.py
mmdet3d/datasets/pipelines/data_augment_utils.py
+26
-24
mmdet3d/datasets/pipelines/dbsampler.py
mmdet3d/datasets/pipelines/dbsampler.py
+21
-11
mmdet3d/datasets/pipelines/formating.py
mmdet3d/datasets/pipelines/formating.py
+15
-14
mmdet3d/datasets/pipelines/loading.py
mmdet3d/datasets/pipelines/loading.py
+43
-27
mmdet3d/datasets/pipelines/test_time_aug.py
mmdet3d/datasets/pipelines/test_time_aug.py
+13
-11
mmdet3d/datasets/pipelines/transforms_3d.py
mmdet3d/datasets/pipelines/transforms_3d.py
+322
-50
mmdet3d/datasets/s3dis_dataset.py
mmdet3d/datasets/s3dis_dataset.py
+2
-1
mmdet3d/datasets/scannet_dataset.py
mmdet3d/datasets/scannet_dataset.py
+9
-8
mmdet3d/datasets/sunrgbd_dataset.py
mmdet3d/datasets/sunrgbd_dataset.py
+13
-9
mmdet3d/datasets/utils.py
mmdet3d/datasets/utils.py
+6
-6
mmdet3d/datasets/waymo_dataset.py
mmdet3d/datasets/waymo_dataset.py
+22
-21
mmdet3d/models/backbones/__init__.py
mmdet3d/models/backbones/__init__.py
+4
-1
mmdet3d/models/backbones/base_pointnet.py
mmdet3d/models/backbones/base_pointnet.py
+1
-0
mmdet3d/models/backbones/dgcnn.py
mmdet3d/models/backbones/dgcnn.py
+98
-0
mmdet3d/models/backbones/dla.py
mmdet3d/models/backbones/dla.py
+446
-0
mmdet3d/models/backbones/multi_backbone.py
mmdet3d/models/backbones/multi_backbone.py
+2
-1
No files found.
mmdet3d/datasets/lyft_dataset.py
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
import
os
import
tempfile
from
os
import
path
as
osp
import
mmcv
import
numpy
as
np
import
os
import
pandas
as
pd
import
tempfile
from
lyft_dataset_sdk.lyftdataset
import
LyftDataset
as
Lyft
from
lyft_dataset_sdk.utils.data_classes
import
Box
as
LyftBox
from
os
import
path
as
osp
from
pyquaternion
import
Quaternion
from
mmdet3d.core.evaluation.lyft_eval
import
lyft_eval
...
...
@@ -129,7 +130,7 @@ class LyftDataset(Custom3DDataset):
index (int): Index of the sample data to get.
Returns:
dict: Data information that will be passed to the data
\
dict: Data information that will be passed to the data
preprocessing pipelines. It includes the following keys:
- sample_idx (str): sample index
...
...
@@ -137,13 +138,13 @@ class LyftDataset(Custom3DDataset):
- sweeps (list[dict]): infos of sweeps
- timestamp (float): sample timestamp
- img_filename (str, optional): image filename
- lidar2img (list[np.ndarray], optional): transformations
\
- lidar2img (list[np.ndarray], optional): transformations
from lidar to different cameras
- ann_info (dict): annotation info
"""
info
=
self
.
data_infos
[
index
]
# standard protoc
a
l modified from SECOND.Pytorch
# standard protoc
o
l modified from SECOND.Pytorch
input_dict
=
dict
(
sample_idx
=
info
[
'token'
],
pts_filename
=
info
[
'lidar_path'
],
...
...
@@ -190,7 +191,7 @@ class LyftDataset(Custom3DDataset):
Returns:
dict: Annotation information consists of the following keys:
- gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`):
\
- gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`):
3D ground truth bboxes.
- gt_labels_3d (np.ndarray): Labels of ground truths.
- gt_names (list[str]): Class names of ground truths.
...
...
@@ -275,10 +276,11 @@ class LyftDataset(Custom3DDataset):
Args:
result_path (str): Path of the result file.
logger (logging.Logger | str
| None
): Logger used for printing
logger (logging.Logger | str
, optional
): Logger used for printing
related information during evaluation. Default: None.
metric (str): Metric name used for evaluation. Default: 'bbox'.
result_name (str): Result name in the metric prefix.
metric (str, optional): Metric name used for evaluation.
Default: 'bbox'.
result_name (str, optional): Result name in the metric prefix.
Default: 'pts_bbox'.
Returns:
...
...
@@ -312,18 +314,18 @@ class LyftDataset(Custom3DDataset):
Args:
results (list[dict]): Testing results of the dataset.
jsonfile_prefix (str
| None
): The prefix of json files. It includes
jsonfile_prefix (str): The prefix of json files. It includes
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
csv_savepath (str
| None
): The path for saving csv files.
csv_savepath (str): The path for saving csv files.
It includes the file path and the csv filename,
e.g., "a/b/filename.csv". If not specified,
the result will not be converted to csv file.
Returns:
tuple: Returns (result_files, tmp_dir), where `result_files` is a
\
dict containing the json filepaths, `tmp_dir` is the temporal
\
directory created for saving json files when
\
tuple: Returns (result_files, tmp_dir), where `result_files` is a
dict containing the json filepaths, `tmp_dir` is the temporal
directory created for saving json files when
`jsonfile_prefix` is not specified.
"""
assert
isinstance
(
results
,
list
),
'results must be a list'
...
...
@@ -372,19 +374,22 @@ class LyftDataset(Custom3DDataset):
Args:
results (list[dict]): Testing results of the dataset.
metric (str | list[str]): Metrics to be evaluated.
logger (logging.Logger | str | None): Logger used for printing
metric (str | list[str], optional): Metrics to be evaluated.
Default: 'bbox'.
logger (logging.Logger | str, optional): Logger used for printing
related information during evaluation. Default: None.
jsonfile_prefix (str
| None
): The prefix of json files
. It
includ
es
jsonfile_prefix (str
, optional
): The prefix of json files includ
ing
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
csv_savepath (str
| None
): The path for saving csv files.
csv_savepath (str
, optional
): The path for saving csv files.
It includes the file path and the csv filename,
e.g., "a/b/filename.csv". If not specified,
the result will not be converted to csv file.
show (bool): Whether to visualize.
result_names (list[str], optional): Result names in the
metric prefix. Default: ['pts_bbox'].
show (bool, optional): Whether to visualize.
Default: False.
out_dir (str): Path to save the visualization results.
out_dir (str
, optional
): Path to save the visualization results.
Default: None.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
...
...
@@ -407,8 +412,8 @@ class LyftDataset(Custom3DDataset):
if
tmp_dir
is
not
None
:
tmp_dir
.
cleanup
()
if
show
:
self
.
show
(
results
,
out_dir
,
pipeline
=
pipeline
)
if
show
or
out_dir
:
self
.
show
(
results
,
out_dir
,
show
=
show
,
pipeline
=
pipeline
)
return
results_dict
def
_build_default_pipeline
(
self
):
...
...
@@ -432,13 +437,14 @@ class LyftDataset(Custom3DDataset):
]
return
Compose
(
pipeline
)
def
show
(
self
,
results
,
out_dir
,
show
=
Tru
e
,
pipeline
=
None
):
def
show
(
self
,
results
,
out_dir
,
show
=
Fals
e
,
pipeline
=
None
):
"""Results visualization.
Args:
results (list[dict]): List of bounding boxes results.
out_dir (str): Output directory of visualization result.
show (bool): Visualize the results online.
show (bool): Whether to visualize the results online.
Default: False.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
"""
...
...
@@ -517,16 +523,16 @@ def output_to_lyft_box(detection):
box_gravity_center
=
box3d
.
gravity_center
.
numpy
()
box_dims
=
box3d
.
dims
.
numpy
()
box_yaw
=
box3d
.
yaw
.
numpy
()
# TODO: check whether this is necessary
#
with dir_offset & dir_limit in the head
box_yaw
=
-
box_
yaw
-
np
.
pi
/
2
#
our LiDAR coordinate system -> Lyft box coordinate system
lyft_box_dims
=
box_
dims
[:,
[
1
,
0
,
2
]]
box_list
=
[]
for
i
in
range
(
len
(
box3d
)):
quat
=
Quaternion
(
axis
=
[
0
,
0
,
1
],
radians
=
box_yaw
[
i
])
box
=
LyftBox
(
box_gravity_center
[
i
],
box_dims
[
i
],
lyft_
box_dims
[
i
],
quat
,
label
=
labels
[
i
],
score
=
scores
[
i
])
...
...
mmdet3d/datasets/nuscenes_dataset.py
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
import
tempfile
from
os
import
path
as
osp
import
mmcv
import
numpy
as
np
import
pyquaternion
import
tempfile
from
nuscenes.utils.data_classes
import
Box
as
NuScenesBox
from
os
import
path
as
osp
from
mmdet.datasets
import
DATASETS
from
..core
import
show_result
...
...
@@ -48,8 +49,9 @@ class NuScenesDataset(Custom3DDataset):
Defaults to False.
eval_version (bool, optional): Configuration version of evaluation.
Defaults to 'detection_cvpr_2019'.
use_valid_flag (bool): Whether to use `use_valid_flag` key in the info
file as mask to filter gt_boxes and gt_names. Defaults to False.
use_valid_flag (bool, optional): Whether to use `use_valid_flag` key
in the info file as mask to filter gt_boxes and gt_names.
Defaults to False.
"""
NameMapping
=
{
'movable_object.barrier'
:
'barrier'
,
...
...
@@ -196,7 +198,7 @@ class NuScenesDataset(Custom3DDataset):
index (int): Index of the sample data to get.
Returns:
dict: Data information that will be passed to the data
\
dict: Data information that will be passed to the data
preprocessing pipelines. It includes the following keys:
- sample_idx (str): Sample index.
...
...
@@ -204,12 +206,12 @@ class NuScenesDataset(Custom3DDataset):
- sweeps (list[dict]): Infos of sweeps.
- timestamp (float): Sample timestamp.
- img_filename (str, optional): Image filename.
- lidar2img (list[np.ndarray], optional): Transformations
\
- lidar2img (list[np.ndarray], optional): Transformations
from lidar to different cameras.
- ann_info (dict): Annotation info.
"""
info
=
self
.
data_infos
[
index
]
# standard protoc
a
l modified from SECOND.Pytorch
# standard protoc
o
l modified from SECOND.Pytorch
input_dict
=
dict
(
sample_idx
=
info
[
'token'
],
pts_filename
=
info
[
'lidar_path'
],
...
...
@@ -256,7 +258,7 @@ class NuScenesDataset(Custom3DDataset):
Returns:
dict: Annotation information consists of the following keys:
- gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`):
\
- gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`):
3D ground truth bboxes
- gt_labels_3d (np.ndarray): Labels of ground truths.
- gt_names (list[str]): Class names of ground truths.
...
...
@@ -374,10 +376,11 @@ class NuScenesDataset(Custom3DDataset):
Args:
result_path (str): Path of the result file.
logger (logging.Logger | str
| None
): Logger used for printing
logger (logging.Logger | str
, optional
): Logger used for printing
related information during evaluation. Default: None.
metric (str): Metric name used for evaluation. Default: 'bbox'.
result_name (str): Result name in the metric prefix.
metric (str, optional): Metric name used for evaluation.
Default: 'bbox'.
result_name (str, optional): Result name in the metric prefix.
Default: 'pts_bbox'.
Returns:
...
...
@@ -427,14 +430,14 @@ class NuScenesDataset(Custom3DDataset):
Args:
results (list[dict]): Testing results of the dataset.
jsonfile_prefix (str
| None
): The prefix of json files. It includes
jsonfile_prefix (str): The prefix of json files. It includes
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
Returns:
tuple: Returns (result_files, tmp_dir), where `result_files` is a
\
dict containing the json filepaths, `tmp_dir` is the temporal
\
directory created for saving json files when
\
tuple: Returns (result_files, tmp_dir), where `result_files` is a
dict containing the json filepaths, `tmp_dir` is the temporal
directory created for saving json files when
`jsonfile_prefix` is not specified.
"""
assert
isinstance
(
results
,
list
),
'results must be a list'
...
...
@@ -480,15 +483,16 @@ class NuScenesDataset(Custom3DDataset):
Args:
results (list[dict]): Testing results of the dataset.
metric (str | list[str]): Metrics to be evaluated.
logger (logging.Logger | str | None): Logger used for printing
metric (str | list[str], optional): Metrics to be evaluated.
Default: 'bbox'.
logger (logging.Logger | str, optional): Logger used for printing
related information during evaluation. Default: None.
jsonfile_prefix (str
| None
): The prefix of json files
. It
includ
es
jsonfile_prefix (str
, optional
): The prefix of json files includ
ing
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
show (bool): Whether to visualize.
show (bool
, optional
): Whether to visualize.
Default: False.
out_dir (str): Path to save the visualization results.
out_dir (str
, optional
): Path to save the visualization results.
Default: None.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
...
...
@@ -510,8 +514,8 @@ class NuScenesDataset(Custom3DDataset):
if
tmp_dir
is
not
None
:
tmp_dir
.
cleanup
()
if
show
:
self
.
show
(
results
,
out_dir
,
pipeline
=
pipeline
)
if
show
or
out_dir
:
self
.
show
(
results
,
out_dir
,
show
=
show
,
pipeline
=
pipeline
)
return
results_dict
def
_build_default_pipeline
(
self
):
...
...
@@ -535,13 +539,14 @@ class NuScenesDataset(Custom3DDataset):
]
return
Compose
(
pipeline
)
def
show
(
self
,
results
,
out_dir
,
show
=
Tru
e
,
pipeline
=
None
):
def
show
(
self
,
results
,
out_dir
,
show
=
Fals
e
,
pipeline
=
None
):
"""Results visualization.
Args:
results (list[dict]): List of bounding boxes results.
out_dir (str): Output directory of visualization result.
show (bool): Visualize the results online.
show (bool): Whether to visualize the results online.
Default: False.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
"""
...
...
@@ -588,9 +593,9 @@ def output_to_nusc_box(detection):
box_gravity_center
=
box3d
.
gravity_center
.
numpy
()
box_dims
=
box3d
.
dims
.
numpy
()
box_yaw
=
box3d
.
yaw
.
numpy
()
# TODO: check whether this is necessary
#
with dir_offset & dir_limit in the head
box_yaw
=
-
box_
yaw
-
np
.
pi
/
2
#
our LiDAR coordinate system -> nuScenes box coordinate system
nus_box_dims
=
box_
dims
[:,
[
1
,
0
,
2
]]
box_list
=
[]
for
i
in
range
(
len
(
box3d
)):
...
...
@@ -602,7 +607,7 @@ def output_to_nusc_box(detection):
# velo_val * np.cos(velo_ori), velo_val * np.sin(velo_ori), 0.0)
box
=
NuScenesBox
(
box_gravity_center
[
i
],
box_dims
[
i
],
nus_
box_dims
[
i
],
quat
,
label
=
labels
[
i
],
score
=
scores
[
i
],
...
...
@@ -624,7 +629,7 @@ def lidar_nusc_box_to_global(info,
boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.
classes (list[str]): Mapped classes in the evaluation.
eval_configs (object): Evaluation configuration object.
eval_version (str): Evaluation version.
eval_version (str
, optional
): Evaluation version.
Default: 'detection_cvpr_2019'
Returns:
...
...
mmdet3d/datasets/nuscenes_mono_dataset.py
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
import
copy
import
tempfile
import
warnings
from
os
import
path
as
osp
import
mmcv
import
numpy
as
np
import
pyquaternion
import
tempfile
import
torch
import
warnings
from
nuscenes.utils.data_classes
import
Box
as
NuScenesBox
from
os
import
path
as
osp
from
mmdet3d.core
import
bbox3d2result
,
box3d_multiclass_nms
,
xywhr2xyxyr
from
mmdet.datasets
import
DATASETS
,
CocoDataset
...
...
@@ -44,8 +45,9 @@ class NuScenesMonoDataset(CocoDataset):
- 'Camera': Box in camera coordinates.
eval_version (str, optional): Configuration version of evaluation.
Defaults to 'detection_cvpr_2019'.
use_valid_flag (bool): Whether to use `use_valid_flag` key in the info
file as mask to filter gt_boxes and gt_names. Defaults to False.
use_valid_flag (bool, optional): Whether to use `use_valid_flag` key
in the info file as mask to filter gt_boxes and gt_names.
Defaults to False.
version (str, optional): Dataset version. Defaults to 'v1.0-trainval'.
"""
CLASSES
=
(
'car'
,
'truck'
,
'trailer'
,
'bus'
,
'construction_vehicle'
,
...
...
@@ -140,8 +142,8 @@ class NuScenesMonoDataset(CocoDataset):
ann_info (list[dict]): Annotation info of an image.
Returns:
dict: A dict containing the following keys: bboxes, labels,
\
gt_bboxes_3d, gt_labels_3d, attr_labels, centers2d,
\
dict: A dict containing the following keys: bboxes, labels,
gt_bboxes_3d, gt_labels_3d, attr_labels, centers2d,
depths, bboxes_ignore, masks, seg_map
"""
gt_bboxes
=
[]
...
...
@@ -394,10 +396,11 @@ class NuScenesMonoDataset(CocoDataset):
Args:
result_path (str): Path of the result file.
logger (logging.Logger | str
| None
): Logger used for printing
logger (logging.Logger | str
, optional
): Logger used for printing
related information during evaluation. Default: None.
metric (str): Metric name used for evaluation. Default: 'bbox'.
result_name (str): Result name in the metric prefix.
metric (str, optional): Metric name used for evaluation.
Default: 'bbox'.
result_name (str, optional): Result name in the metric prefix.
Default: 'img_bbox'.
Returns:
...
...
@@ -448,13 +451,13 @@ class NuScenesMonoDataset(CocoDataset):
Args:
results (list[tuple | numpy.ndarray]): Testing results of the
dataset.
jsonfile_prefix (str
| None
): The prefix of json files. It includes
jsonfile_prefix (str): The prefix of json files. It includes
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
Returns:
tuple: (result_files, tmp_dir), result_files is a dict containing
\
the json filepaths, tmp_dir is the temporal directory created
\
tuple: (result_files, tmp_dir), result_files is a dict containing
the json filepaths, tmp_dir is the temporal directory created
for saving json files when jsonfile_prefix is not specified.
"""
assert
isinstance
(
results
,
list
),
'results must be a list'
...
...
@@ -504,15 +507,18 @@ class NuScenesMonoDataset(CocoDataset):
Args:
results (list[dict]): Testing results of the dataset.
metric (str | list[str]): Metrics to be evaluated.
logger (logging.Logger | str | None): Logger used for printing
metric (str | list[str], optional): Metrics to be evaluated.
Default: 'bbox'.
logger (logging.Logger | str, optional): Logger used for printing
related information during evaluation. Default: None.
jsonfile_prefix (str
| None
): The prefix of json files. It includes
jsonfile_prefix (str): The prefix of json files. It includes
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
show (bool): Whether to visualize.
result_names (list[str], optional): Result names in the
metric prefix. Default: ['img_bbox'].
show (bool, optional): Whether to visualize.
Default: False.
out_dir (str): Path to save the visualization results.
out_dir (str
, optional
): Path to save the visualization results.
Default: None.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
...
...
@@ -535,7 +541,7 @@ class NuScenesMonoDataset(CocoDataset):
if
tmp_dir
is
not
None
:
tmp_dir
.
cleanup
()
if
show
:
if
show
or
out_dir
:
self
.
show
(
results
,
out_dir
,
pipeline
=
pipeline
)
return
results_dict
...
...
@@ -576,7 +582,7 @@ class NuScenesMonoDataset(CocoDataset):
"""Get data loading pipeline in self.show/evaluate function.
Args:
pipeline (list[dict]
| None
): Input pipeline. If None is given,
\
pipeline (list[dict]): Input pipeline. If None is given,
get from self.pipeline.
"""
if
pipeline
is
None
:
...
...
@@ -601,13 +607,14 @@ class NuScenesMonoDataset(CocoDataset):
]
return
Compose
(
pipeline
)
def
show
(
self
,
results
,
out_dir
,
show
=
Tru
e
,
pipeline
=
None
):
def
show
(
self
,
results
,
out_dir
,
show
=
Fals
e
,
pipeline
=
None
):
"""Results visualization.
Args:
results (list[dict]): List of bounding boxes results.
out_dir (str): Output directory of visualization result.
show (bool): Visualize the results online.
show (bool): Whether to visualize the results online.
Default: False.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
"""
...
...
@@ -696,7 +703,7 @@ def cam_nusc_box_to_global(info,
boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.
classes (list[str]): Mapped classes in the evaluation.
eval_configs (object): Evaluation configuration object.
eval_version (str): Evaluation version.
eval_version (str
, optional
): Evaluation version.
Default: 'detection_cvpr_2019'
Returns:
...
...
@@ -736,7 +743,7 @@ def global_nusc_box_to_cam(info,
boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.
classes (list[str]): Mapped classes in the evaluation.
eval_configs (object): Evaluation configuration object.
eval_version (str): Evaluation version.
eval_version (str
, optional
): Evaluation version.
Default: 'detection_cvpr_2019'
Returns:
...
...
@@ -769,7 +776,7 @@ def nusc_box_to_cam_box3d(boxes):
boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.
Returns:
tuple (:obj:`CameraInstance3DBoxes` | torch.Tensor | torch.Tensor):
\
tuple (:obj:`CameraInstance3DBoxes` | torch.Tensor | torch.Tensor):
Converted 3D bounding boxes, scores and labels.
"""
locs
=
torch
.
Tensor
([
b
.
center
for
b
in
boxes
]).
view
(
-
1
,
3
)
...
...
mmdet3d/datasets/pipelines/__init__.py
View file @
32a4328b
...
...
@@ -3,17 +3,19 @@ from mmdet.datasets.pipelines import Compose
from
.dbsampler
import
DataBaseSampler
from
.formating
import
Collect3D
,
DefaultFormatBundle
,
DefaultFormatBundle3D
from
.loading
import
(
LoadAnnotations3D
,
LoadImageFromFileMono3D
,
LoadMultiViewImageFromFiles
,
LoadPointsFrom
File
,
LoadPointsFrom
MultiSweeps
,
NormalizePointsColor
,
PointSegClassMapping
)
LoadMultiViewImageFromFiles
,
LoadPointsFrom
Dict
,
LoadPointsFrom
File
,
LoadPointsFromMultiSweeps
,
NormalizePointsColor
,
PointSegClassMapping
)
from
.test_time_aug
import
MultiScaleFlipAug3D
from
.transforms_3d
import
(
BackgroundPointsFilter
,
GlobalAlignment
,
GlobalRotScaleTrans
,
IndoorPatchPointSample
,
IndoorPointSample
,
ObjectNameFilter
,
ObjectNoise
,
ObjectRangeFilter
,
ObjectSample
,
PointSample
,
PointShuffle
,
PointsRangeFilter
,
RandomDropPointsColor
,
RandomFlip3D
,
RandomJitterPoints
,
VoxelBasedPointSampler
)
# yapf: disable
from
.transforms_3d
import
(
AffineResize
,
BackgroundPointsFilter
,
GlobalAlignment
,
GlobalRotScaleTrans
,
IndoorPatchPointSample
,
IndoorPointSample
,
ObjectNameFilter
,
ObjectNoise
,
ObjectRangeFilter
,
ObjectSample
,
PointSample
,
PointShuffle
,
PointsRangeFilter
,
RandomDropPointsColor
,
RandomFlip3D
,
RandomJitterPoints
,
RandomShiftScale
,
VoxelBasedPointSampler
)
__all__
=
[
'ObjectSample'
,
'RandomFlip3D'
,
'ObjectNoise'
,
'GlobalRotScaleTrans'
,
...
...
@@ -25,5 +27,6 @@ __all__ = [
'LoadPointsFromMultiSweeps'
,
'BackgroundPointsFilter'
,
'VoxelBasedPointSampler'
,
'GlobalAlignment'
,
'IndoorPatchPointSample'
,
'LoadImageFromFileMono3D'
,
'ObjectNameFilter'
,
'RandomDropPointsColor'
,
'RandomJitterPoints'
'RandomJitterPoints'
,
'AffineResize'
,
'RandomShiftScale'
,
'LoadPointsFromDict'
]
mmdet3d/datasets/pipelines/data_augment_utils.py
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
import
warnings
import
numba
import
numpy
as
np
import
warnings
from
numba.errors
import
NumbaPerformanceWarning
from
numba.core.errors
import
NumbaPerformanceWarning
from
mmdet3d.core.bbox
import
box_np_ops
...
...
@@ -21,8 +22,8 @@ def _rotation_box2d_jit_(corners, angle, rot_mat_T):
rot_sin
=
np
.
sin
(
angle
)
rot_cos
=
np
.
cos
(
angle
)
rot_mat_T
[
0
,
0
]
=
rot_cos
rot_mat_T
[
0
,
1
]
=
-
rot_sin
rot_mat_T
[
1
,
0
]
=
rot_sin
rot_mat_T
[
0
,
1
]
=
rot_sin
rot_mat_T
[
1
,
0
]
=
-
rot_sin
rot_mat_T
[
1
,
1
]
=
rot_cos
corners
[:]
=
corners
@
rot_mat_T
...
...
@@ -34,8 +35,8 @@ def box_collision_test(boxes, qboxes, clockwise=True):
Args:
boxes (np.ndarray): Corners of current boxes.
qboxes (np.ndarray): Boxes to be avoid colliding.
clockwise (bool): Whether the corners are in
clockwise order.
Default: True.
clockwise (bool
, optional
): Whether the corners are in
clockwise order.
Default: True.
"""
N
=
boxes
.
shape
[
0
]
K
=
qboxes
.
shape
[
0
]
...
...
@@ -211,8 +212,8 @@ def noise_per_box_v2_(boxes, valid_mask, loc_noises, rot_noises,
rot_sin
=
np
.
sin
(
current_box
[
0
,
-
1
])
rot_cos
=
np
.
cos
(
current_box
[
0
,
-
1
])
rot_mat_T
[
0
,
0
]
=
rot_cos
rot_mat_T
[
0
,
1
]
=
-
rot_sin
rot_mat_T
[
1
,
0
]
=
rot_sin
rot_mat_T
[
0
,
1
]
=
rot_sin
rot_mat_T
[
1
,
0
]
=
-
rot_sin
rot_mat_T
[
1
,
1
]
=
rot_cos
current_corners
[:]
=
current_box
[
0
,
2
:
4
]
*
corners_norm
@
rot_mat_T
+
current_box
[
0
,
:
2
]
...
...
@@ -264,18 +265,18 @@ def _rotation_matrix_3d_(rot_mat_T, angle, axis):
rot_mat_T
[:]
=
np
.
eye
(
3
)
if
axis
==
1
:
rot_mat_T
[
0
,
0
]
=
rot_cos
rot_mat_T
[
0
,
2
]
=
-
rot_sin
rot_mat_T
[
2
,
0
]
=
rot_sin
rot_mat_T
[
0
,
2
]
=
rot_sin
rot_mat_T
[
2
,
0
]
=
-
rot_sin
rot_mat_T
[
2
,
2
]
=
rot_cos
elif
axis
==
2
or
axis
==
-
1
:
rot_mat_T
[
0
,
0
]
=
rot_cos
rot_mat_T
[
0
,
1
]
=
-
rot_sin
rot_mat_T
[
1
,
0
]
=
rot_sin
rot_mat_T
[
0
,
1
]
=
rot_sin
rot_mat_T
[
1
,
0
]
=
-
rot_sin
rot_mat_T
[
1
,
1
]
=
rot_cos
elif
axis
==
0
:
rot_mat_T
[
1
,
1
]
=
rot_cos
rot_mat_T
[
1
,
2
]
=
-
rot_sin
rot_mat_T
[
2
,
1
]
=
rot_sin
rot_mat_T
[
1
,
2
]
=
rot_sin
rot_mat_T
[
2
,
1
]
=
-
rot_sin
rot_mat_T
[
2
,
2
]
=
rot_cos
...
...
@@ -317,7 +318,7 @@ def box3d_transform_(boxes, loc_transform, rot_transform, valid_mask):
boxes (np.ndarray): 3D boxes to be transformed.
loc_transform (np.ndarray): Location transform to be applied.
rot_transform (np.ndarray): Rotation transform to be applied.
valid_mask (np.ndarray
| None
): Mask to indicate which boxes are valid.
valid_mask (np.ndarray): Mask to indicate which boxes are valid.
"""
num_box
=
boxes
.
shape
[
0
]
for
i
in
range
(
num_box
):
...
...
@@ -338,16 +339,17 @@ def noise_per_object_v3_(gt_boxes,
Args:
gt_boxes (np.ndarray): Ground truth boxes with shape (N, 7).
points (np.ndarray | None): Input point cloud with shape (M, 4).
Default: None.
valid_mask (np.ndarray | None): Mask to indicate which boxes are valid.
Default: None.
rotation_perturb (float): Rotation perturbation. Default: pi / 4.
center_noise_std (float): Center noise standard deviation.
points (np.ndarray, optional): Input point cloud with
shape (M, 4). Default: None.
valid_mask (np.ndarray, optional): Mask to indicate which
boxes are valid. Default: None.
rotation_perturb (float, optional): Rotation perturbation.
Default: pi / 4.
center_noise_std (float, optional): Center noise standard deviation.
Default: 1.0.
global_random_rot_range (float): Global random rotation
range.
Default: pi/4.
num_try (int): Number of try. Default: 100.
global_random_rot_range (float
, optional
): Global random rotation
range.
Default: pi/4.
num_try (int
, optional
): Number of try. Default: 100.
"""
num_boxes
=
gt_boxes
.
shape
[
0
]
if
not
isinstance
(
rotation_perturb
,
(
list
,
tuple
,
np
.
ndarray
)):
...
...
mmdet3d/datasets/pipelines/dbsampler.py
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
import
copy
import
os
import
mmcv
import
numpy
as
np
import
os
from
mmdet3d.core.bbox
import
box_np_ops
from
mmdet3d.datasets.pipelines
import
data_augment_utils
...
...
@@ -15,10 +16,10 @@ class BatchSampler:
Args:
sample_list (list[dict]): List of samples.
name (str
| None
): The category of samples. Default: None.
epoch (int
| None
): Sampling epoch. Default: None.
shuffle (bool): Whether to shuffle indices. Default: False.
drop_reminder (bool): Drop reminder. Default: False.
name (str
, optional
): The category of samples. Default: None.
epoch (int
, optional
): Sampling epoch. Default: None.
shuffle (bool
, optional
): Whether to shuffle indices. Default: False.
drop_reminder (bool
, optional
): Drop reminder. Default: False.
"""
def
__init__
(
self
,
...
...
@@ -87,9 +88,9 @@ class DataBaseSampler(object):
rate (float): Rate of actual sampled over maximum sampled number.
prepare (dict): Name of preparation functions and the input value.
sample_groups (dict): Sampled classes and numbers.
classes (list[str]): List of classes. Default: None.
points_loader(dict): Config of points loader. Default:
dict(
type='LoadPointsFromFile', load_dim=4, use_dim=[0,1,2,3])
classes (list[str]
, optional
): List of classes. Default: None.
points_loader(dict
, optional
): Config of points loader. Default:
dict(
type='LoadPointsFromFile', load_dim=4, use_dim=[0,1,2,3])
"""
def
__init__
(
self
,
...
...
@@ -188,7 +189,7 @@ class DataBaseSampler(object):
db_infos
[
name
]
=
filtered_infos
return
db_infos
def
sample_all
(
self
,
gt_bboxes
,
gt_labels
,
img
=
None
):
def
sample_all
(
self
,
gt_bboxes
,
gt_labels
,
img
=
None
,
ground_plane
=
None
):
"""Sampling all categories of bboxes.
Args:
...
...
@@ -198,9 +199,9 @@ class DataBaseSampler(object):
Returns:
dict: Dict of sampled 'pseudo ground truths'.
- gt_labels_3d (np.ndarray): ground truths labels
\
- gt_labels_3d (np.ndarray): ground truths labels
of sampled objects.
- gt_bboxes_3d (:obj:`BaseInstance3DBoxes`):
\
- gt_bboxes_3d (:obj:`BaseInstance3DBoxes`):
sampled ground truth 3D bounding boxes
- points (np.ndarray): sampled points
- group_ids (np.ndarray): ids of sampled ground truths
...
...
@@ -263,6 +264,15 @@ class DataBaseSampler(object):
gt_labels
=
np
.
array
([
self
.
cat2label
[
s
[
'name'
]]
for
s
in
sampled
],
dtype
=
np
.
long
)
if
ground_plane
is
not
None
:
xyz
=
sampled_gt_bboxes
[:,
:
3
]
dz
=
(
ground_plane
[:
3
][
None
,
:]
*
xyz
).
sum
(
-
1
)
+
ground_plane
[
3
]
sampled_gt_bboxes
[:,
2
]
-=
dz
for
i
,
s_points
in
enumerate
(
s_points_list
):
s_points
.
tensor
[:,
2
].
sub_
(
dz
[
i
])
ret
=
{
'gt_labels_3d'
:
gt_labels
,
...
...
mmdet3d/datasets/pipelines/formating.py
View file @
32a4328b
...
...
@@ -24,7 +24,7 @@ class DefaultFormatBundle(object):
- gt_bboxes_ignore: (1)to tensor, (2)to DataContainer
- gt_labels: (1)to tensor, (2)to DataContainer
- gt_masks: (1)to tensor, (2)to DataContainer (cpu_only=True)
- gt_semantic_seg: (1)unsqueeze dim-0 (2)to tensor,
\
- gt_semantic_seg: (1)unsqueeze dim-0 (2)to tensor,
(3)to DataContainer (stack=True)
"""
...
...
@@ -92,8 +92,8 @@ class Collect3D(object):
The "img_meta" item is always populated. The contents of the "img_meta"
dictionary depends on "meta_keys". By default this includes:
- 'img_shape': shape of the image input to the network as a tuple
\
(h, w, c). Note that images may be zero padded on the
\
- 'img_shape': shape of the image input to the network as a tuple
(h, w, c). Note that images may be zero padded on the
bottom/right if the batch tensor is larger than this shape.
- 'scale_factor': a float indicating the preprocessing scale
- 'flip': a boolean indicating if image flip transform was used
...
...
@@ -103,9 +103,9 @@ class Collect3D(object):
- 'lidar2img': transform from lidar to image
- 'depth2img': transform from depth to image
- 'cam2img': transform from camera to image
- 'pcd_horizontal_flip': a boolean indicating if point cloud is
\
- 'pcd_horizontal_flip': a boolean indicating if point cloud is
flipped horizontally
- 'pcd_vertical_flip': a boolean indicating if point cloud is
\
- 'pcd_vertical_flip': a boolean indicating if point cloud is
flipped vertically
- 'box_mode_3d': 3D box mode
- 'box_type_3d': 3D box type
...
...
@@ -130,15 +130,16 @@ class Collect3D(object):
'sample_idx', 'pcd_scale_factor', 'pcd_rotation', 'pts_filename')
"""
def
__init__
(
self
,
def
__init__
(
self
,
keys
,
meta_keys
=
(
'filename'
,
'ori_shape'
,
'img_shape'
,
'lidar2img'
,
'depth2img'
,
'cam2img'
,
'pad_shape'
,
'sc
al
e
_
factor'
,
'
flip'
,
'pcd_
horizont
al_flip'
,
'pcd_vertical_flip'
,
'box_mode_3d'
,
'box_type_3d
'
,
'img_norm_cfg'
,
'pcd_trans'
,
'sample_idx
'
,
'pcd_scale_factor'
,
'pcd_rotation'
,
'pts_filename
'
,
'transformation_3d_flow
'
)):
'depth2img'
,
'cam2img'
,
'pad_shape'
,
'scale_factor'
,
'flip'
,
'pcd_horizont
al_flip'
,
'pcd_
vertic
al_flip'
,
'box_mode_3d'
,
'box_type_3d'
,
'img_norm_cfg'
,
'pcd_trans'
,
'sample_idx
'
,
'pcd_scale_factor'
,
'pcd_rotation'
,
'pcd_rotation_angle
'
,
'pts_filename'
,
'transformation_3d_flow'
,
'trans_mat
'
,
'affine_aug
'
)):
self
.
keys
=
keys
self
.
meta_keys
=
meta_keys
...
...
mmdet3d/datasets/pipelines/loading.py
View file @
32a4328b
...
...
@@ -14,9 +14,10 @@ class LoadMultiViewImageFromFiles(object):
Expects results['img_filename'] to be a list of filenames.
Args:
to_float32 (bool): Whether to convert the img to float32.
to_float32 (bool
, optional
): Whether to convert the img to float32.
Defaults to False.
color_type (str): Color type of the file. Defaults to 'unchanged'.
color_type (str, optional): Color type of the file.
Defaults to 'unchanged'.
"""
def
__init__
(
self
,
to_float32
=
False
,
color_type
=
'unchanged'
):
...
...
@@ -30,7 +31,7 @@ class LoadMultiViewImageFromFiles(object):
results (dict): Result dict containing multi-view image filenames.
Returns:
dict: The result dict containing the multi-view image data.
\
dict: The result dict containing the multi-view image data.
Added keys and values are described below.
- filename (str): Multi-view image filenames.
...
...
@@ -48,7 +49,7 @@ class LoadMultiViewImageFromFiles(object):
if
self
.
to_float32
:
img
=
img
.
astype
(
np
.
float32
)
results
[
'filename'
]
=
filename
# unravel to list, see `DefaultFormatBundle` in formating.py
# unravel to list, see `DefaultFormatBundle` in format
t
ing.py
# which will transpose each image separately and then stack into array
results
[
'img'
]
=
[
img
[...,
i
]
for
i
in
range
(
img
.
shape
[
-
1
])]
results
[
'img_shape'
]
=
img
.
shape
...
...
@@ -77,7 +78,7 @@ class LoadImageFromFileMono3D(LoadImageFromFile):
detection, additional camera parameters need to be loaded.
Args:
kwargs (dict): Arguments are the same as those in
\
kwargs (dict): Arguments are the same as those in
:class:`LoadImageFromFile`.
"""
...
...
@@ -102,17 +103,20 @@ class LoadPointsFromMultiSweeps(object):
This is usually used for nuScenes dataset to utilize previous sweeps.
Args:
sweeps_num (int): Number of sweeps. Defaults to 10.
load_dim (int): Dimension number of the loaded points. Defaults to 5.
use_dim (list[int]): Which dimension to use. Defaults to [0, 1, 2, 4].
file_client_args (dict): Config dict of file clients, refer to
sweeps_num (int, optional): Number of sweeps. Defaults to 10.
load_dim (int, optional): Dimension number of the loaded points.
Defaults to 5.
use_dim (list[int], optional): Which dimension to use.
Defaults to [0, 1, 2, 4].
file_client_args (dict, optional): Config dict of file clients,
refer to
https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py
for more details. Defaults to dict(backend='disk').
pad_empty_sweeps (bool): Whether to repeat keyframe when
pad_empty_sweeps (bool
, optional
): Whether to repeat keyframe when
sweeps is empty. Defaults to False.
remove_close (bool): Whether to remove close points.
remove_close (bool
, optional
): Whether to remove close points.
Defaults to False.
test_mode (bool): If test_mode
l
=True
used for testing
, it will not
test_mode (bool
, optional
): If
`
test_mode=True
`
, it will not
randomly sample sweeps but select the nearest N frames.
Defaults to False.
"""
...
...
@@ -161,7 +165,7 @@ class LoadPointsFromMultiSweeps(object):
Args:
points (np.ndarray | :obj:`BasePoints`): Sweep points.
radius (float): Radius below which points are removed.
radius (float
, optional
): Radius below which points are removed.
Defaults to 1.0.
Returns:
...
...
@@ -182,14 +186,14 @@ class LoadPointsFromMultiSweeps(object):
"""Call function to load multi-sweep point clouds from files.
Args:
results (dict): Result dict containing multi-sweep point cloud
\
results (dict): Result dict containing multi-sweep point cloud
filenames.
Returns:
dict: The result dict containing the multi-sweep points data.
\
dict: The result dict containing the multi-sweep points data.
Added key and value are described below.
- points (np.ndarray | :obj:`BasePoints`): Multi-sweep point
\
- points (np.ndarray | :obj:`BasePoints`): Multi-sweep point
cloud arrays.
"""
points
=
results
[
'points'
]
...
...
@@ -243,8 +247,8 @@ class PointSegClassMapping(object):
Args:
valid_cat_ids (tuple[int]): A tuple of valid category.
max_cat_id (int): The max possible cat_id in input
segmentation mask.
Defaults to 40.
max_cat_id (int
, optional
): The max possible cat_id in input
segmentation mask.
Defaults to 40.
"""
def
__init__
(
self
,
valid_cat_ids
,
max_cat_id
=
40
):
...
...
@@ -268,7 +272,7 @@ class PointSegClassMapping(object):
results (dict): Result dict containing point semantic masks.
Returns:
dict: The result dict containing the mapped category ids.
\
dict: The result dict containing the mapped category ids.
Updated key and value are described below.
- pts_semantic_mask (np.ndarray): Mapped semantic masks.
...
...
@@ -307,7 +311,7 @@ class NormalizePointsColor(object):
results (dict): Result dict containing point clouds data.
Returns:
dict: The result dict containing the normalized points.
\
dict: The result dict containing the normalized points.
Updated key and value are described below.
- points (:obj:`BasePoints`): Points after color normalization.
...
...
@@ -334,7 +338,7 @@ class NormalizePointsColor(object):
class
LoadPointsFromFile
(
object
):
"""Load Points From File.
Load
sunrgbd and scannet
points from file.
Load points from file.
Args:
coord_type (str): The type of coordinates of points cloud.
...
...
@@ -342,14 +346,17 @@ class LoadPointsFromFile(object):
- 'LIDAR': Points in LiDAR coordinates.
- 'DEPTH': Points in depth coordinates, usually for indoor dataset.
- 'CAMERA': Points in camera coordinates.
load_dim (int): The dimension of the loaded points.
load_dim (int
, optional
): The dimension of the loaded points.
Defaults to 6.
use_dim (list[int]): Which dimensions of the points to
be
use
d
.
use_dim (list[int]
, optional
): Which dimensions of the points to use.
Defaults to [0, 1, 2]. For KITTI dataset, set use_dim=4
or use_dim=[0, 1, 2, 3] to use the intensity dimension.
shift_height (bool): Whether to use shifted height. Defaults to False.
use_color (bool): Whether to use color features. Defaults to False.
file_client_args (dict): Config dict of file clients, refer to
shift_height (bool, optional): Whether to use shifted height.
Defaults to False.
use_color (bool, optional): Whether to use color features.
Defaults to False.
file_client_args (dict, optional): Config dict of file clients,
refer to
https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py
for more details. Defaults to dict(backend='disk').
"""
...
...
@@ -405,7 +412,7 @@ class LoadPointsFromFile(object):
results (dict): Result dict containing point clouds data.
Returns:
dict: The result dict containing the point clouds data.
\
dict: The result dict containing the point clouds data.
Added key and value are described below.
- points (:obj:`BasePoints`): Point clouds data.
...
...
@@ -453,6 +460,15 @@ class LoadPointsFromFile(object):
return
repr_str
@
PIPELINES
.
register_module
()
class
LoadPointsFromDict
(
LoadPointsFromFile
):
"""Load Points From Dict."""
def
__call__
(
self
,
results
):
assert
'points'
in
results
return
results
@
PIPELINES
.
register_module
()
class
LoadAnnotations3D
(
LoadAnnotations
):
"""Load Annotations3D.
...
...
mmdet3d/datasets/pipelines/test_time_aug.py
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
import
mmcv
import
warnings
from
copy
import
deepcopy
import
mmcv
from
mmdet.datasets.builder
import
PIPELINES
from
mmdet.datasets.pipelines
import
Compose
...
...
@@ -16,18 +17,19 @@ class MultiScaleFlipAug3D(object):
img_scale (tuple | list[tuple]: Images scales for resizing.
pts_scale_ratio (float | list[float]): Points scale ratios for
resizing.
flip (bool): Whether apply flip augmentation. Defaults to False.
flip_direction (str | list[str]): Flip augmentation directions
for images, options are "horizontal" and "vertical".
flip (bool, optional): Whether apply flip augmentation.
Defaults to False.
flip_direction (str | list[str], optional): Flip augmentation
directions for images, options are "horizontal" and "vertical".
If flip_direction is list, multiple flip augmentations will
be applied. It has no effect when ``flip == False``.
Defaults to "horizontal".
pcd_horizontal_flip (bool): Whether apply horizontal
flip augmentation
to point cloud. Defaults to True.
Note that it works only when
'flip' is turned on.
pcd_vertical_flip (bool): Whether apply vertical flip
augmentation
to point cloud. Defaults to True.
Note that it works only when
'flip' is turned on.
pcd_horizontal_flip (bool
, optional
): Whether apply horizontal
flip augmentation
to point cloud. Defaults to True.
Note that it works only when
'flip' is turned on.
pcd_vertical_flip (bool
, optional
): Whether apply vertical flip
augmentation
to point cloud. Defaults to True.
Note that it works only when
'flip' is turned on.
"""
def
__init__
(
self
,
...
...
@@ -70,7 +72,7 @@ class MultiScaleFlipAug3D(object):
results (dict): Result dict contains the data to augment.
Returns:
dict: The result dict contains the data that is augmented with
\
dict: The result dict contains the data that is augmented with
different scales and flips.
"""
aug_data
=
[]
...
...
mmdet3d/datasets/pipelines/transforms_3d.py
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
import
numpy
as
np
import
random
import
warnings
import
cv2
import
numpy
as
np
from
mmcv
import
is_tuple_of
from
mmcv.utils
import
build_from_cfg
...
...
@@ -22,7 +25,7 @@ class RandomDropPointsColor(object):
util/transform.py#L223>`_ for more details.
Args:
drop_ratio (float): The probability of dropping point colors.
drop_ratio (float
, optional
): The probability of dropping point colors.
Defaults to 0.2.
"""
...
...
@@ -38,7 +41,7 @@ class RandomDropPointsColor(object):
input_dict (dict): Result dict from loading pipeline.
Returns:
dict: Results after color dropping,
\
dict: Results after color dropping,
'points' key is updated in the result dict.
"""
points
=
input_dict
[
'points'
]
...
...
@@ -105,10 +108,11 @@ class RandomFlip3D(RandomFlip):
Args:
input_dict (dict): Result dict from loading pipeline.
direction (str): Flip direction. Default: horizontal.
direction (str, optional): Flip direction.
Default: 'horizontal'.
Returns:
dict: Flipped results, 'points', 'bbox3d_fields' keys are
\
dict: Flipped results, 'points', 'bbox3d_fields' keys are
updated in the result dict.
"""
assert
direction
in
[
'horizontal'
,
'vertical'
]
...
...
@@ -141,15 +145,15 @@ class RandomFlip3D(RandomFlip):
input_dict
[
'cam2img'
][
0
][
2
]
=
w
-
input_dict
[
'cam2img'
][
0
][
2
]
def
__call__
(
self
,
input_dict
):
"""Call function to flip points, values in the ``bbox3d_fields`` and
\
"""Call function to flip points, values in the ``bbox3d_fields`` and
also flip 2D image and its annotations.
Args:
input_dict (dict): Result dict from loading pipeline.
Returns:
dict: Flipped results, 'flip', 'flip_direction',
\
'pcd_horizontal_flip' and 'pcd_vertical_flip' keys are added
\
dict: Flipped results, 'flip', 'flip_direction',
'pcd_horizontal_flip' and 'pcd_vertical_flip' keys are added
into result dict.
"""
# flip 2D image and its annotations
...
...
@@ -191,20 +195,20 @@ class RandomFlip3D(RandomFlip):
class
RandomJitterPoints
(
object
):
"""Randomly jitter point coordinates.
Different from the global translation in ``GlobalRotScaleTrans``, here we
\
Different from the global translation in ``GlobalRotScaleTrans``, here we
apply different noises to each point in a scene.
Args:
jitter_std (list[float]): The standard deviation of jittering noise.
This applies random noise to all points in a 3D scene, which is
\
sampled from a gaussian distribution whose standard deviation is
\
This applies random noise to all points in a 3D scene, which is
sampled from a gaussian distribution whose standard deviation is
set by ``jitter_std``. Defaults to [0.01, 0.01, 0.01]
clip_range (list[float]
| None
): Clip the randomly generated jitter
\
clip_range (list[float]): Clip the randomly generated jitter
noise into this range. If None is given, don't perform clipping.
Defaults to [-0.05, 0.05]
Note:
This transform should only be used in point cloud segmentation tasks
\
This transform should only be used in point cloud segmentation tasks
because we don't transform ground-truth bboxes accordingly.
For similar transform in detection task, please refer to `ObjectNoise`.
"""
...
...
@@ -233,7 +237,7 @@ class RandomJitterPoints(object):
input_dict (dict): Result dict from loading pipeline.
Returns:
dict: Results after adding noise to each point,
\
dict: Results after adding noise to each point,
'points' key is updated in the result dict.
"""
points
=
input_dict
[
'points'
]
...
...
@@ -264,14 +268,17 @@ class ObjectSample(object):
sample_2d (bool): Whether to also paste 2D image patch to the images
This should be true when applying multi-modality cut-and-paste.
Defaults to False.
use_ground_plane (bool): Whether to use gound plane to adjust the
3D labels.
"""
def
__init__
(
self
,
db_sampler
,
sample_2d
=
False
):
def
__init__
(
self
,
db_sampler
,
sample_2d
=
False
,
use_ground_plane
=
False
):
self
.
sampler_cfg
=
db_sampler
self
.
sample_2d
=
sample_2d
if
'type'
not
in
db_sampler
.
keys
():
db_sampler
[
'type'
]
=
'DataBaseSampler'
self
.
db_sampler
=
build_from_cfg
(
db_sampler
,
OBJECTSAMPLERS
)
self
.
use_ground_plane
=
use_ground_plane
@
staticmethod
def
remove_points_in_boxes
(
points
,
boxes
):
...
...
@@ -295,13 +302,18 @@ class ObjectSample(object):
input_dict (dict): Result dict from loading pipeline.
Returns:
dict: Results after object sampling augmentation,
\
'points', 'gt_bboxes_3d', 'gt_labels_3d' keys are updated
\
dict: Results after object sampling augmentation,
'points', 'gt_bboxes_3d', 'gt_labels_3d' keys are updated
in the result dict.
"""
gt_bboxes_3d
=
input_dict
[
'gt_bboxes_3d'
]
gt_labels_3d
=
input_dict
[
'gt_labels_3d'
]
if
self
.
use_ground_plane
and
'plane'
in
input_dict
[
'ann_info'
]:
ground_plane
=
input_dict
[
'ann_info'
][
'plane'
]
input_dict
[
'plane'
]
=
ground_plane
else
:
ground_plane
=
None
# change to float for blending operation
points
=
input_dict
[
'points'
]
if
self
.
sample_2d
:
...
...
@@ -315,7 +327,10 @@ class ObjectSample(object):
img
=
img
)
else
:
sampled_dict
=
self
.
db_sampler
.
sample_all
(
gt_bboxes_3d
.
tensor
.
numpy
(),
gt_labels_3d
,
img
=
None
)
gt_bboxes_3d
.
tensor
.
numpy
(),
gt_labels_3d
,
img
=
None
,
ground_plane
=
ground_plane
)
if
sampled_dict
is
not
None
:
sampled_gt_bboxes_3d
=
sampled_dict
[
'gt_bboxes_3d'
]
...
...
@@ -392,13 +407,13 @@ class ObjectNoise(object):
input_dict (dict): Result dict from loading pipeline.
Returns:
dict: Results after adding noise to each object,
\
dict: Results after adding noise to each object,
'points', 'gt_bboxes_3d' keys are updated in the result dict.
"""
gt_bboxes_3d
=
input_dict
[
'gt_bboxes_3d'
]
points
=
input_dict
[
'points'
]
# TODO:
check th
is inplace
func
tion
# TODO:
this
is inplace
opera
tion
numpy_box
=
gt_bboxes_3d
.
tensor
.
numpy
()
numpy_points
=
points
.
tensor
.
numpy
()
...
...
@@ -432,10 +447,10 @@ class GlobalAlignment(object):
rotation_axis (int): Rotation axis for points and bboxes rotation.
Note:
We do not record the applied rotation and translation as in
\
GlobalRotScaleTrans. Because usually, we do not need to reverse
\
We do not record the applied rotation and translation as in
GlobalRotScaleTrans. Because usually, we do not need to reverse
the alignment step.
For example, ScanNet 3D detection task uses aligned ground-truth
\
For example, ScanNet 3D detection task uses aligned ground-truth
bounding boxes for evaluation.
"""
...
...
@@ -487,7 +502,7 @@ class GlobalAlignment(object):
input_dict (dict): Result dict from loading pipeline.
Returns:
dict: Results after global alignment, 'points' and keys in
\
dict: Results after global alignment, 'points' and keys in
input_dict['bbox3d_fields'] are updated in the result dict.
"""
assert
'axis_align_matrix'
in
input_dict
[
'ann_info'
].
keys
(),
\
...
...
@@ -516,15 +531,15 @@ class GlobalRotScaleTrans(object):
"""Apply global rotation, scaling and translation to a 3D scene.
Args:
rot_range (list[float]): Range of rotation angle.
rot_range (list[float]
, optional
): Range of rotation angle.
Defaults to [-0.78539816, 0.78539816] (close to [-pi/4, pi/4]).
scale_ratio_range (list[float]): Range of scale ratio.
scale_ratio_range (list[float]
, optional
): Range of scale ratio.
Defaults to [0.95, 1.05].
translation_std (list[float]): The standard deviation of
translation
noise. Th
is applie
s random translation to a scene by a nois
e, which
translation_std (list[float]
, optional
): The standard deviation of
translation no
is
e
applie
d to a scen
e, which
is sampled from a gaussian distribution whose standard deviation
is set by ``translation_std``. Defaults to [0, 0, 0]
shift_height (bool): Whether to shift height.
shift_height (bool
, optional
): Whether to shift height.
(the fourth dimension of indoor points) when scaling.
Defaults to False.
"""
...
...
@@ -563,8 +578,8 @@ class GlobalRotScaleTrans(object):
input_dict (dict): Result dict from loading pipeline.
Returns:
dict: Results after translation, 'points', 'pcd_trans'
\
and keys in input_dict['bbox3d_fields'] are updated
\
dict: Results after translation, 'points', 'pcd_trans'
and keys in input_dict['bbox3d_fields'] are updated
in the result dict.
"""
translation_std
=
np
.
array
(
self
.
translation_std
,
dtype
=
np
.
float32
)
...
...
@@ -582,8 +597,8 @@ class GlobalRotScaleTrans(object):
input_dict (dict): Result dict from loading pipeline.
Returns:
dict: Results after rotation, 'points', 'pcd_rotation'
\
and keys in input_dict['bbox3d_fields'] are updated
\
dict: Results after rotation, 'points', 'pcd_rotation'
and keys in input_dict['bbox3d_fields'] are updated
in the result dict.
"""
rotation
=
self
.
rot_range
...
...
@@ -593,6 +608,7 @@ class GlobalRotScaleTrans(object):
if
len
(
input_dict
[
'bbox3d_fields'
])
==
0
:
rot_mat_T
=
input_dict
[
'points'
].
rotate
(
noise_rotation
)
input_dict
[
'pcd_rotation'
]
=
rot_mat_T
input_dict
[
'pcd_rotation_angle'
]
=
noise_rotation
return
# rotate points with bboxes
...
...
@@ -602,6 +618,7 @@ class GlobalRotScaleTrans(object):
noise_rotation
,
input_dict
[
'points'
])
input_dict
[
'points'
]
=
points
input_dict
[
'pcd_rotation'
]
=
rot_mat_T
input_dict
[
'pcd_rotation_angle'
]
=
noise_rotation
def
_scale_bbox_points
(
self
,
input_dict
):
"""Private function to scale bounding boxes and points.
...
...
@@ -610,7 +627,7 @@ class GlobalRotScaleTrans(object):
input_dict (dict): Result dict from loading pipeline.
Returns:
dict: Results after scaling, 'points'and keys in
\
dict: Results after scaling, 'points'and keys in
input_dict['bbox3d_fields'] are updated in the result dict.
"""
scale
=
input_dict
[
'pcd_scale_factor'
]
...
...
@@ -632,7 +649,7 @@ class GlobalRotScaleTrans(object):
input_dict (dict): Result dict from loading pipeline.
Returns:
dict: Results after scaling, 'pcd_scale_factor' are updated
\
dict: Results after scaling, 'pcd_scale_factor' are updated
in the result dict.
"""
scale_factor
=
np
.
random
.
uniform
(
self
.
scale_ratio_range
[
0
],
...
...
@@ -640,7 +657,7 @@ class GlobalRotScaleTrans(object):
input_dict
[
'pcd_scale_factor'
]
=
scale_factor
def
__call__
(
self
,
input_dict
):
"""Private function to rotate, scale and translate bounding boxes and
\
"""Private function to rotate, scale and translate bounding boxes and
points.
Args:
...
...
@@ -648,7 +665,7 @@ class GlobalRotScaleTrans(object):
Returns:
dict: Results after scaling, 'points', 'pcd_rotation',
'pcd_scale_factor', 'pcd_trans' and keys in
\
'pcd_scale_factor', 'pcd_trans' and keys in
input_dict['bbox3d_fields'] are updated in the result dict.
"""
if
'transformation_3d_flow'
not
in
input_dict
:
...
...
@@ -686,7 +703,7 @@ class PointShuffle(object):
input_dict (dict): Result dict from loading pipeline.
Returns:
dict: Results after filtering, 'points', 'pts_instance_mask'
\
dict: Results after filtering, 'points', 'pts_instance_mask'
and 'pts_semantic_mask' keys are updated in the result dict.
"""
idx
=
input_dict
[
'points'
].
shuffle
()
...
...
@@ -725,7 +742,7 @@ class ObjectRangeFilter(object):
input_dict (dict): Result dict from loading pipeline.
Returns:
dict: Results after filtering, 'gt_bboxes_3d', 'gt_labels_3d'
\
dict: Results after filtering, 'gt_bboxes_3d', 'gt_labels_3d'
keys are updated in the result dict.
"""
# Check points instance type and initialise bev_range
...
...
@@ -777,7 +794,7 @@ class PointsRangeFilter(object):
input_dict (dict): Result dict from loading pipeline.
Returns:
dict: Results after filtering, 'points', 'pts_instance_mask'
\
dict: Results after filtering, 'points', 'pts_instance_mask'
and 'pts_semantic_mask' keys are updated in the result dict.
"""
points
=
input_dict
[
'points'
]
...
...
@@ -823,7 +840,7 @@ class ObjectNameFilter(object):
input_dict (dict): Result dict from loading pipeline.
Returns:
dict: Results after filtering, 'gt_bboxes_3d', 'gt_labels_3d'
\
dict: Results after filtering, 'gt_bboxes_3d', 'gt_labels_3d'
keys are updated in the result dict.
"""
gt_labels_3d
=
input_dict
[
'gt_labels_3d'
]
...
...
@@ -891,8 +908,8 @@ class PointSample(object):
if
sample_range
is
not
None
and
not
replace
:
# Only sampling the near points when len(points) >= num_samples
depth
=
np
.
linalg
.
norm
(
points
.
tensor
,
axis
=
1
)
far_inds
=
np
.
where
(
depth
>
sample_range
)[
0
]
near_inds
=
np
.
where
(
depth
<
=
sample_range
)[
0
]
far_inds
=
np
.
where
(
depth
>
=
sample_range
)[
0
]
near_inds
=
np
.
where
(
depth
<
sample_range
)[
0
]
# in case there are too many far points
if
len
(
far_inds
)
>
num_samples
:
far_inds
=
np
.
random
.
choice
(
...
...
@@ -915,7 +932,7 @@ class PointSample(object):
Args:
input_dict (dict): Result dict from loading pipeline.
Returns:
dict: Results after sampling, 'points', 'pts_instance_mask'
\
dict: Results after sampling, 'points', 'pts_instance_mask'
and 'pts_semantic_mask' keys are updated in the result dict.
"""
points
=
results
[
'points'
]
...
...
@@ -996,10 +1013,10 @@ class IndoorPatchPointSample(object):
additional features. Defaults to False.
num_try (int, optional): Number of times to try if the patch selected
is invalid. Defaults to 10.
enlarge_size (float
| None
, optional): Enlarge the sampled patch to
enlarge_size (float, optional): Enlarge the sampled patch to
[-block_size / 2 - enlarge_size, block_size / 2 + enlarge_size] as
an augmentation. If None, set it as 0. Defaults to 0.2.
min_unique_num (int
| None
, optional): Minimum number of unique points
min_unique_num (int, optional): Minimum number of unique points
the sampled patch should contain. If None, use PointNet++'s method
to judge uniqueness. Defaults to None.
eps (float, optional): A value added to patch boundary to guarantee
...
...
@@ -1040,7 +1057,7 @@ class IndoorPatchPointSample(object):
attribute_dims
,
point_type
):
"""Generating model input.
Generate input by subtracting patch center and adding additional
\
Generate input by subtracting patch center and adding additional
features. Currently support colors and normalized xyz as features.
Args:
...
...
@@ -1184,7 +1201,7 @@ class IndoorPatchPointSample(object):
input_dict (dict): Result dict from loading pipeline.
Returns:
dict: Results after sampling, 'points', 'pts_instance_mask'
\
dict: Results after sampling, 'points', 'pts_instance_mask'
and 'pts_semantic_mask' keys are updated in the result dict.
"""
points
=
results
[
'points'
]
...
...
@@ -1244,7 +1261,7 @@ class BackgroundPointsFilter(object):
input_dict (dict): Result dict from loading pipeline.
Returns:
dict: Results after filtering, 'points', 'pts_instance_mask'
\
dict: Results after filtering, 'points', 'pts_instance_mask'
and 'pts_semantic_mask' keys are updated in the result dict.
"""
points
=
input_dict
[
'points'
]
...
...
@@ -1342,7 +1359,7 @@ class VoxelBasedPointSampler(object):
input_dict (dict): Result dict from loading pipeline.
Returns:
dict: Results after sampling, 'points', 'pts_instance_mask'
\
dict: Results after sampling, 'points', 'pts_instance_mask'
and 'pts_semantic_mask' keys are updated in the result dict.
"""
points
=
results
[
'points'
]
...
...
@@ -1423,3 +1440,258 @@ class VoxelBasedPointSampler(object):
repr_str
+=
' '
*
indent
+
'prev_voxel_generator=
\n
'
repr_str
+=
f
'
{
_auto_indent
(
repr
(
self
.
prev_voxel_generator
),
8
)
}
)'
return
repr_str
@
PIPELINES
.
register_module
()
class
AffineResize
(
object
):
"""Get the affine transform matrices to the target size.
Different from :class:`RandomAffine` in MMDetection, this class can
calculate the affine transform matrices while resizing the input image
to a fixed size. The affine transform matrices include: 1) matrix
transforming original image to the network input image size. 2) matrix
transforming original image to the network output feature map size.
Args:
img_scale (tuple): Images scales for resizing.
down_ratio (int): The down ratio of feature map.
Actually the arg should be >= 1.
bbox_clip_border (bool, optional): Whether clip the objects
outside the border of the image. Defaults to True.
"""
def
__init__
(
self
,
img_scale
,
down_ratio
,
bbox_clip_border
=
True
):
self
.
img_scale
=
img_scale
self
.
down_ratio
=
down_ratio
self
.
bbox_clip_border
=
bbox_clip_border
def
__call__
(
self
,
results
):
"""Call function to do affine transform to input image and labels.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Results after affine resize, 'affine_aug', 'trans_mat'
keys are added in the result dict.
"""
# The results have gone through RandomShiftScale before AffineResize
if
'center'
not
in
results
:
img
=
results
[
'img'
]
height
,
width
=
img
.
shape
[:
2
]
center
=
np
.
array
([
width
/
2
,
height
/
2
],
dtype
=
np
.
float32
)
size
=
np
.
array
([
width
,
height
],
dtype
=
np
.
float32
)
results
[
'affine_aug'
]
=
False
else
:
# The results did not go through RandomShiftScale before
# AffineResize
img
=
results
[
'img'
]
center
=
results
[
'center'
]
size
=
results
[
'size'
]
trans_affine
=
self
.
_get_transform_matrix
(
center
,
size
,
self
.
img_scale
)
img
=
cv2
.
warpAffine
(
img
,
trans_affine
[:
2
,
:],
self
.
img_scale
)
if
isinstance
(
self
.
down_ratio
,
tuple
):
trans_mat
=
[
self
.
_get_transform_matrix
(
center
,
size
,
(
self
.
img_scale
[
0
]
//
ratio
,
self
.
img_scale
[
1
]
//
ratio
))
for
ratio
in
self
.
down_ratio
]
# (3, 3)
else
:
trans_mat
=
self
.
_get_transform_matrix
(
center
,
size
,
(
self
.
img_scale
[
0
]
//
self
.
down_ratio
,
self
.
img_scale
[
1
]
//
self
.
down_ratio
))
results
[
'img'
]
=
img
results
[
'img_shape'
]
=
img
.
shape
results
[
'pad_shape'
]
=
img
.
shape
results
[
'trans_mat'
]
=
trans_mat
self
.
_affine_bboxes
(
results
,
trans_affine
)
if
'centers2d'
in
results
:
centers2d
=
self
.
_affine_transform
(
results
[
'centers2d'
],
trans_affine
)
valid_index
=
(
centers2d
[:,
0
]
>
0
)
&
(
centers2d
[:,
0
]
<
self
.
img_scale
[
0
])
&
(
centers2d
[:,
1
]
>
0
)
&
(
centers2d
[:,
1
]
<
self
.
img_scale
[
1
])
results
[
'centers2d'
]
=
centers2d
[
valid_index
]
for
key
in
results
.
get
(
'bbox_fields'
,
[]):
if
key
in
[
'gt_bboxes'
]:
results
[
key
]
=
results
[
key
][
valid_index
]
if
'gt_labels'
in
results
:
results
[
'gt_labels'
]
=
results
[
'gt_labels'
][
valid_index
]
if
'gt_masks'
in
results
:
raise
NotImplementedError
(
'AffineResize only supports bbox.'
)
for
key
in
results
.
get
(
'bbox3d_fields'
,
[]):
if
key
in
[
'gt_bboxes_3d'
]:
results
[
key
].
tensor
=
results
[
key
].
tensor
[
valid_index
]
if
'gt_labels_3d'
in
results
:
results
[
'gt_labels_3d'
]
=
results
[
'gt_labels_3d'
][
valid_index
]
results
[
'depths'
]
=
results
[
'depths'
][
valid_index
]
return
results
def
_affine_bboxes
(
self
,
results
,
matrix
):
"""Affine transform bboxes to input image.
Args:
results (dict): Result dict from loading pipeline.
matrix (np.ndarray): Matrix transforming original
image to the network input image size.
shape: (3, 3)
"""
for
key
in
results
.
get
(
'bbox_fields'
,
[]):
bboxes
=
results
[
key
]
bboxes
[:,
:
2
]
=
self
.
_affine_transform
(
bboxes
[:,
:
2
],
matrix
)
bboxes
[:,
2
:]
=
self
.
_affine_transform
(
bboxes
[:,
2
:],
matrix
)
if
self
.
bbox_clip_border
:
bboxes
[:,
[
0
,
2
]]
=
bboxes
[:,
[
0
,
2
]].
clip
(
0
,
self
.
img_scale
[
0
]
-
1
)
bboxes
[:,
[
1
,
3
]]
=
bboxes
[:,
[
1
,
3
]].
clip
(
0
,
self
.
img_scale
[
1
]
-
1
)
results
[
key
]
=
bboxes
def
_affine_transform
(
self
,
points
,
matrix
):
"""Affine transform bbox points to input image.
Args:
points (np.ndarray): Points to be transformed.
shape: (N, 2)
matrix (np.ndarray): Affine transform matrix.
shape: (3, 3)
Returns:
np.ndarray: Transformed points.
"""
num_points
=
points
.
shape
[
0
]
hom_points_2d
=
np
.
concatenate
((
points
,
np
.
ones
((
num_points
,
1
))),
axis
=
1
)
hom_points_2d
=
hom_points_2d
.
T
affined_points
=
np
.
matmul
(
matrix
,
hom_points_2d
).
T
return
affined_points
[:,
:
2
]
def
_get_transform_matrix
(
self
,
center
,
scale
,
output_scale
):
"""Get affine transform matrix.
Args:
center (tuple): Center of current image.
scale (tuple): Scale of current image.
output_scale (tuple[float]): The transform target image scales.
Returns:
np.ndarray: Affine transform matrix.
"""
# TODO: further add rot and shift here.
src_w
=
scale
[
0
]
dst_w
=
output_scale
[
0
]
dst_h
=
output_scale
[
1
]
src_dir
=
np
.
array
([
0
,
src_w
*
-
0.5
])
dst_dir
=
np
.
array
([
0
,
dst_w
*
-
0.5
])
src
=
np
.
zeros
((
3
,
2
),
dtype
=
np
.
float32
)
dst
=
np
.
zeros
((
3
,
2
),
dtype
=
np
.
float32
)
src
[
0
,
:]
=
center
src
[
1
,
:]
=
center
+
src_dir
dst
[
0
,
:]
=
np
.
array
([
dst_w
*
0.5
,
dst_h
*
0.5
])
dst
[
1
,
:]
=
np
.
array
([
dst_w
*
0.5
,
dst_h
*
0.5
])
+
dst_dir
src
[
2
,
:]
=
self
.
_get_ref_point
(
src
[
0
,
:],
src
[
1
,
:])
dst
[
2
,
:]
=
self
.
_get_ref_point
(
dst
[
0
,
:],
dst
[
1
,
:])
get_matrix
=
cv2
.
getAffineTransform
(
src
,
dst
)
matrix
=
np
.
concatenate
((
get_matrix
,
[[
0.
,
0.
,
1.
]]))
return
matrix
.
astype
(
np
.
float32
)
def
_get_ref_point
(
self
,
ref_point1
,
ref_point2
):
"""Get reference point to calculate affine transform matrix.
While using opencv to calculate the affine matrix, we need at least
three corresponding points separately on original image and target
image. Here we use two points to get the the third reference point.
"""
d
=
ref_point1
-
ref_point2
ref_point3
=
ref_point2
+
np
.
array
([
-
d
[
1
],
d
[
0
]])
return
ref_point3
def
__repr__
(
self
):
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
f
'(img_scale=
{
self
.
img_scale
}
, '
repr_str
+=
f
'down_ratio=
{
self
.
down_ratio
}
) '
return
repr_str
@
PIPELINES
.
register_module
()
class
RandomShiftScale
(
object
):
"""Random shift scale.
Different from the normal shift and scale function, it doesn't
directly shift or scale image. It can record the shift and scale
infos into loading pipelines. It's designed to be used with
AffineResize together.
Args:
shift_scale (tuple[float]): Shift and scale range.
aug_prob (float): The shifting and scaling probability.
"""
def
__init__
(
self
,
shift_scale
,
aug_prob
):
self
.
shift_scale
=
shift_scale
self
.
aug_prob
=
aug_prob
def
__call__
(
self
,
results
):
"""Call function to record random shift and scale infos.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Results after random shift and scale, 'center', 'size'
and 'affine_aug' keys are added in the result dict.
"""
img
=
results
[
'img'
]
height
,
width
=
img
.
shape
[:
2
]
center
=
np
.
array
([
width
/
2
,
height
/
2
],
dtype
=
np
.
float32
)
size
=
np
.
array
([
width
,
height
],
dtype
=
np
.
float32
)
if
random
.
random
()
<
self
.
aug_prob
:
shift
,
scale
=
self
.
shift_scale
[
0
],
self
.
shift_scale
[
1
]
shift_ranges
=
np
.
arange
(
-
shift
,
shift
+
0.1
,
0.1
)
center
[
0
]
+=
size
[
0
]
*
random
.
choice
(
shift_ranges
)
center
[
1
]
+=
size
[
1
]
*
random
.
choice
(
shift_ranges
)
scale_ranges
=
np
.
arange
(
1
-
scale
,
1
+
scale
+
0.1
,
0.1
)
size
*=
random
.
choice
(
scale_ranges
)
results
[
'affine_aug'
]
=
True
else
:
results
[
'affine_aug'
]
=
False
results
[
'center'
]
=
center
results
[
'size'
]
=
size
return
results
def
__repr__
(
self
):
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
f
'(shift_scale=
{
self
.
shift_scale
}
, '
repr_str
+=
f
'aug_prob=
{
self
.
aug_prob
}
) '
return
repr_str
mmdet3d/datasets/s3dis_dataset.py
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
import
numpy
as
np
from
os
import
path
as
osp
import
numpy
as
np
from
mmdet3d.core
import
show_seg_result
from
mmdet3d.core.bbox
import
DepthInstance3DBoxes
from
mmdet.datasets
import
DATASETS
...
...
mmdet3d/datasets/scannet_dataset.py
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
import
numpy
as
np
import
tempfile
import
warnings
from
os
import
path
as
osp
import
numpy
as
np
from
mmdet3d.core
import
show_result
,
show_seg_result
from
mmdet3d.core.bbox
import
DepthInstance3DBoxes
from
mmdet.datasets
import
DATASETS
...
...
@@ -78,13 +79,13 @@ class ScanNetDataset(Custom3DDataset):
index (int): Index of the sample data to get.
Returns:
dict: Data information that will be passed to the data
\
dict: Data information that will be passed to the data
preprocessing pipelines. It includes the following keys:
- sample_idx (str): Sample index.
- pts_filename (str): Filename of point clouds.
- file_name (str): Filename of point clouds.
- img_prefix (str
| None
, optional): Prefix of image files.
- img_prefix (str, optional): Prefix of image files.
- img_info (dict, optional): Image info.
- ann_info (dict): Annotation info.
"""
...
...
@@ -129,12 +130,12 @@ class ScanNetDataset(Custom3DDataset):
Returns:
dict: annotation information consists of the following keys:
- gt_bboxes_3d (:obj:`DepthInstance3DBoxes`):
\
- gt_bboxes_3d (:obj:`DepthInstance3DBoxes`):
3D ground truth bboxes
- gt_labels_3d (np.ndarray): Labels of ground truths.
- pts_instance_mask_path (str): Path of instance masks.
- pts_semantic_mask_path (str): Path of semantic masks.
- axis_align_matrix (np.ndarray): Transformation matrix for
\
- axis_align_matrix (np.ndarray): Transformation matrix for
global scene alignment.
"""
# Use index to get the annos, thus the evalhook could also use this api
...
...
@@ -172,7 +173,7 @@ class ScanNetDataset(Custom3DDataset):
def
prepare_test_data
(
self
,
index
):
"""Prepare data for testing.
We should take axis_align_matrix from self.data_infos since we need
\
We should take axis_align_matrix from self.data_infos since we need
to align point clouds.
Args:
...
...
@@ -272,7 +273,7 @@ class ScanNetSegDataset(Custom3DSegDataset):
as input. Defaults to None.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
ignore_index (int, optional): The label index to be ignored, e.g.
\
ignore_index (int, optional): The label index to be ignored, e.g.
unannotated points. If None is given, set to len(self.CLASSES).
Defaults to None.
scene_idxs (np.ndarray | str, optional): Precomputed index to load
...
...
@@ -424,7 +425,7 @@ class ScanNetSegDataset(Custom3DSegDataset):
Args:
outputs (list[dict]): Testing results of the dataset.
txtfile_prefix (str
| None
): The prefix of saved files. It includes
txtfile_prefix (str): The prefix of saved files. It includes
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
...
...
mmdet3d/datasets/sunrgbd_dataset.py
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
import
numpy
as
np
from
collections
import
OrderedDict
from
os
import
path
as
osp
import
numpy
as
np
from
mmdet3d.core
import
show_multi_modality_result
,
show_result
from
mmdet3d.core.bbox
import
DepthInstance3DBoxes
from
mmdet.core
import
eval_map
...
...
@@ -74,13 +75,13 @@ class SUNRGBDDataset(Custom3DDataset):
index (int): Index of the sample data to get.
Returns:
dict: Data information that will be passed to the data
\
dict: Data information that will be passed to the data
preprocessing pipelines. It includes the following keys:
- sample_idx (str): Sample index.
- pts_filename (str, optional): Filename of point clouds.
- file_name (str, optional): Filename of point clouds.
- img_prefix (str
| None
, optional): Prefix of image files.
- img_prefix (str, optional): Prefix of image files.
- img_info (dict, optional): Image info.
- calib (dict, optional): Camera calibration info.
- ann_info (dict): Annotation info.
...
...
@@ -125,7 +126,7 @@ class SUNRGBDDataset(Custom3DDataset):
Returns:
dict: annotation information consists of the following keys:
- gt_bboxes_3d (:obj:`DepthInstance3DBoxes`):
\
- gt_bboxes_3d (:obj:`DepthInstance3DBoxes`):
3D ground truth bboxes
- gt_labels_3d (np.ndarray): Labels of ground truths.
- pts_instance_mask_path (str): Path of instance masks.
...
...
@@ -239,12 +240,15 @@ class SUNRGBDDataset(Custom3DDataset):
Args:
results (list[dict]): List of results.
metric (str | list[str]): Metrics to be evaluated.
iou_thr (list[float]): AP IoU thresholds.
iou_thr_2d (list[float]): AP IoU thresholds for 2d evaluation.
show (bool): Whether to visualize.
metric (str | list[str], optional): Metrics to be evaluated.
Default: None.
iou_thr (list[float], optional): AP IoU thresholds for 3D
evaluation. Default: (0.25, 0.5).
iou_thr_2d (list[float], optional): AP IoU thresholds for 2D
evaluation. Default: (0.5, ).
show (bool, optional): Whether to visualize.
Default: False.
out_dir (str): Path to save the visualization results.
out_dir (str
, optional
): Path to save the visualization results.
Default: None.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
...
...
mmdet3d/datasets/utils.py
View file @
32a4328b
...
...
@@ -12,7 +12,7 @@ from mmdet3d.datasets.pipelines import (Collect3D, DefaultFormatBundle3D,
PointSegClassMapping
)
# yapf: enable
from
mmdet.datasets.builder
import
PIPELINES
from
mmdet.datasets.pipelines
import
LoadImageFromFile
from
mmdet.datasets.pipelines
import
LoadImageFromFile
,
MultiScaleFlipAug
def
is_loading_function
(
transform
):
...
...
@@ -25,7 +25,7 @@ def is_loading_function(transform):
transform (dict | :obj:`Pipeline`): A transform config or a function.
Returns:
bool
| None
: Whether it is a loading function. None means can't judge.
bool: Whether it is a loading function. None means can't judge.
When transform is `MultiScaleFlipAug3D`, we return None.
"""
# TODO: use more elegant way to distinguish loading modules
...
...
@@ -40,12 +40,12 @@ def is_loading_function(transform):
return
False
if
obj_cls
in
loading_functions
:
return
True
if
obj_cls
in
(
MultiScaleFlipAug3D
,
):
if
obj_cls
in
(
MultiScaleFlipAug3D
,
MultiScaleFlipAug
):
return
None
elif
callable
(
transform
):
if
isinstance
(
transform
,
loading_functions
):
return
True
if
isinstance
(
transform
,
MultiScaleFlipAug3D
):
if
isinstance
(
transform
,
(
MultiScaleFlipAug3D
,
MultiScaleFlipAug
)
):
return
None
return
False
...
...
@@ -92,7 +92,7 @@ def get_loading_pipeline(pipeline):
... dict(type='Collect3D',
... keys=['points', 'img', 'gt_bboxes_3d', 'gt_labels_3d'])
... ]
>>> assert expected_pipelines ==
\
>>> assert expected_pipelines ==
\
... get_loading_pipeline(pipelines)
"""
loading_pipeline
=
[]
...
...
@@ -126,7 +126,7 @@ def extract_result_dict(results, key):
key (str): Key of the desired data.
Returns:
np.ndarray | torch.Tensor
| None
: Data term.
np.ndarray | torch.Tensor: Data term.
"""
if
key
not
in
results
.
keys
():
return
None
...
...
mmdet3d/datasets/waymo_dataset.py
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
import
mmcv
import
numpy
as
np
import
os
import
tempfile
from
os
import
path
as
osp
import
mmcv
import
numpy
as
np
import
torch
from
mmcv.utils
import
print_log
from
os
import
path
as
osp
from
mmdet.datasets
import
DATASETS
from
..core.bbox
import
Box3DMode
,
points_cam2img
...
...
@@ -46,8 +47,9 @@ class WaymoDataset(KittiDataset):
Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
pcd_limit_range (list): The range of point cloud used to filter
invalid predicted boxes. Default: [-85, -85, -5, 85, 85, 5].
pcd_limit_range (list(float), optional): The range of point cloud used
to filter invalid predicted boxes.
Default: [-85, -85, -5, 85, 85, 5].
"""
CLASSES
=
(
'Car'
,
'Cyclist'
,
'Pedestrian'
)
...
...
@@ -100,7 +102,7 @@ class WaymoDataset(KittiDataset):
- sample_idx (str): sample index
- pts_filename (str): filename of point clouds
- img_prefix (str
| None
): prefix of image files
- img_prefix (str): prefix of image files
- img_info (dict): image info
- lidar2img (list[np.ndarray], optional): transformations from
lidar to different cameras
...
...
@@ -140,15 +142,15 @@ class WaymoDataset(KittiDataset):
Args:
outputs (list[dict]): Testing results of the dataset.
pklfile_prefix (str
| None
): The prefix of pkl files. It includes
pklfile_prefix (str): The prefix of pkl files. It includes
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
submission_prefix (str
| None
): The prefix of submitted files. It
submission_prefix (str): The prefix of submitted files. It
includes the file path and the prefix of filename, e.g.,
"a/b/prefix". If not specified, a temp file will be created.
Default: None.
data_format (str
| None
): Output data format.
Default: 'waymo'.
Another supported choice is 'kitti'.
data_format (str
, optional
): Output data format.
Default: 'waymo'.
Another supported choice is 'kitti'.
Returns:
tuple: (result_files, tmp_dir), result_files is a dict containing
...
...
@@ -226,18 +228,18 @@ class WaymoDataset(KittiDataset):
Args:
results (list[dict]): Testing results of the dataset.
metric (str | list[str]): Metrics to be evaluated.
metric (str | list[str]
, optional
): Metrics to be evaluated.
Default: 'waymo'. Another supported metric is 'kitti'.
logger (logging.Logger | str
| None
): Logger used for printing
logger (logging.Logger | str
, optional
): Logger used for printing
related information during evaluation. Default: None.
pklfile_prefix (str
| None
): The prefix of pkl files
. It
includ
es
pklfile_prefix (str
, optional
): The prefix of pkl files includ
ing
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
submission_prefix (str
| None
): The prefix of submission data
s
.
submission_prefix (str
, optional
): The prefix of submission data.
If not specified, the submission data will not be generated.
show (bool): Whether to visualize.
show (bool
, optional
): Whether to visualize.
Default: False.
out_dir (str): Path to save the visualization results.
out_dir (str
, optional
): Path to save the visualization results.
Default: None.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
...
...
@@ -349,8 +351,8 @@ class WaymoDataset(KittiDataset):
if
tmp_dir
is
not
None
:
tmp_dir
.
cleanup
()
if
show
:
self
.
show
(
results
,
out_dir
,
pipeline
=
pipeline
)
if
show
or
out_dir
:
self
.
show
(
results
,
out_dir
,
show
=
show
,
pipeline
=
pipeline
)
return
ap_dict
def
bbox2result_kitti
(
self
,
...
...
@@ -364,8 +366,8 @@ class WaymoDataset(KittiDataset):
net_outputs (List[np.ndarray]): list of array storing the
bbox and score
class_nanes (List[String]): A list of class names
pklfile_prefix (str
| None
): The prefix of pkl file.
submission_prefix (str
| None
): The prefix of submission file.
pklfile_prefix (str): The prefix of pkl file.
submission_prefix (str): The prefix of submission file.
Returns:
List[dict]: A list of dict have the kitti 3d format
...
...
@@ -494,7 +496,6 @@ class WaymoDataset(KittiDataset):
scores
=
box_dict
[
'scores_3d'
]
labels
=
box_dict
[
'labels_3d'
]
sample_idx
=
info
[
'image'
][
'image_idx'
]
# TODO: remove the hack of yaw
box_preds
.
limit_yaw
(
offset
=
0.5
,
period
=
np
.
pi
*
2
)
if
len
(
box_preds
)
==
0
:
...
...
mmdet3d/models/backbones/__init__.py
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
from
mmdet.models.backbones
import
SSDVGG
,
HRNet
,
ResNet
,
ResNetV1d
,
ResNeXt
from
.dgcnn
import
DGCNNBackbone
from
.dla
import
DLANet
from
.multi_backbone
import
MultiBackbone
from
.nostem_regnet
import
NoStemRegNet
from
.pointnet2_sa_msg
import
PointNet2SAMSG
...
...
@@ -8,5 +10,6 @@ from .second import SECOND
__all__
=
[
'ResNet'
,
'ResNetV1d'
,
'ResNeXt'
,
'SSDVGG'
,
'HRNet'
,
'NoStemRegNet'
,
'SECOND'
,
'PointNet2SASSG'
,
'PointNet2SAMSG'
,
'MultiBackbone'
'SECOND'
,
'DGCNNBackbone'
,
'PointNet2SASSG'
,
'PointNet2SAMSG'
,
'MultiBackbone'
,
'DLANet'
]
mmdet3d/models/backbones/base_pointnet.py
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
import
warnings
from
abc
import
ABCMeta
from
mmcv.runner
import
BaseModule
...
...
mmdet3d/models/backbones/dgcnn.py
0 → 100644
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
from
mmcv.runner
import
BaseModule
,
auto_fp16
from
torch
import
nn
as
nn
from
mmdet3d.ops
import
DGCNNFAModule
,
DGCNNGFModule
from
mmdet.models
import
BACKBONES
@
BACKBONES
.
register_module
()
class
DGCNNBackbone
(
BaseModule
):
"""Backbone network for DGCNN.
Args:
in_channels (int): Input channels of point cloud.
num_samples (tuple[int], optional): The number of samples for knn or
ball query in each graph feature (GF) module.
Defaults to (20, 20, 20).
knn_modes (tuple[str], optional): Mode of KNN of each knn module.
Defaults to ('D-KNN', 'F-KNN', 'F-KNN').
radius (tuple[float], optional): Sampling radii of each GF module.
Defaults to (None, None, None).
gf_channels (tuple[tuple[int]], optional): Out channels of each mlp in
GF module. Defaults to ((64, 64), (64, 64), (64, )).
fa_channels (tuple[int], optional): Out channels of each mlp in FA
module. Defaults to (1024, ).
act_cfg (dict, optional): Config of activation layer.
Defaults to dict(type='ReLU').
init_cfg (dict, optional): Initialization config.
Defaults to None.
"""
def
__init__
(
self
,
in_channels
,
num_samples
=
(
20
,
20
,
20
),
knn_modes
=
(
'D-KNN'
,
'F-KNN'
,
'F-KNN'
),
radius
=
(
None
,
None
,
None
),
gf_channels
=
((
64
,
64
),
(
64
,
64
),
(
64
,
)),
fa_channels
=
(
1024
,
),
act_cfg
=
dict
(
type
=
'ReLU'
),
init_cfg
=
None
):
super
().
__init__
(
init_cfg
=
init_cfg
)
self
.
num_gf
=
len
(
gf_channels
)
assert
len
(
num_samples
)
==
len
(
knn_modes
)
==
len
(
radius
)
==
len
(
gf_channels
),
'Num_samples, knn_modes, radius and gf_channels
\
should have the same length.'
self
.
GF_modules
=
nn
.
ModuleList
()
gf_in_channel
=
in_channels
*
2
skip_channel_list
=
[
gf_in_channel
]
# input channel list
for
gf_index
in
range
(
self
.
num_gf
):
cur_gf_mlps
=
list
(
gf_channels
[
gf_index
])
cur_gf_mlps
=
[
gf_in_channel
]
+
cur_gf_mlps
gf_out_channel
=
cur_gf_mlps
[
-
1
]
self
.
GF_modules
.
append
(
DGCNNGFModule
(
mlp_channels
=
cur_gf_mlps
,
num_sample
=
num_samples
[
gf_index
],
knn_mode
=
knn_modes
[
gf_index
],
radius
=
radius
[
gf_index
],
act_cfg
=
act_cfg
))
skip_channel_list
.
append
(
gf_out_channel
)
gf_in_channel
=
gf_out_channel
*
2
fa_in_channel
=
sum
(
skip_channel_list
[
1
:])
cur_fa_mlps
=
list
(
fa_channels
)
cur_fa_mlps
=
[
fa_in_channel
]
+
cur_fa_mlps
self
.
FA_module
=
DGCNNFAModule
(
mlp_channels
=
cur_fa_mlps
,
act_cfg
=
act_cfg
)
@
auto_fp16
(
apply_to
=
(
'points'
,
))
def
forward
(
self
,
points
):
"""Forward pass.
Args:
points (torch.Tensor): point coordinates with features,
with shape (B, N, in_channels).
Returns:
dict[str, list[torch.Tensor]]: Outputs after graph feature (GF) and
feature aggregation (FA) modules.
- gf_points (list[torch.Tensor]): Outputs after each GF module.
- fa_points (torch.Tensor): Outputs after FA module.
"""
gf_points
=
[
points
]
for
i
in
range
(
self
.
num_gf
):
cur_points
=
self
.
GF_modules
[
i
](
gf_points
[
i
])
gf_points
.
append
(
cur_points
)
fa_points
=
self
.
FA_module
(
gf_points
)
out
=
dict
(
gf_points
=
gf_points
,
fa_points
=
fa_points
)
return
out
mmdet3d/models/backbones/dla.py
0 → 100644
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
import
warnings
import
torch
from
mmcv.cnn
import
build_conv_layer
,
build_norm_layer
from
mmcv.runner
import
BaseModule
from
torch
import
nn
from
mmdet.models.builder
import
BACKBONES
def
dla_build_norm_layer
(
cfg
,
num_features
):
"""Build normalization layer specially designed for DLANet.
Args:
cfg (dict): The norm layer config, which should contain:
- type (str): Layer type.
- layer args: Args needed to instantiate a norm layer.
- requires_grad (bool, optional): Whether stop gradient updates.
num_features (int): Number of input channels.
Returns:
Function: Build normalization layer in mmcv.
"""
cfg_
=
cfg
.
copy
()
if
cfg_
[
'type'
]
==
'GN'
:
if
num_features
%
32
==
0
:
return
build_norm_layer
(
cfg_
,
num_features
)
else
:
assert
'num_groups'
in
cfg_
cfg_
[
'num_groups'
]
=
cfg_
[
'num_groups'
]
//
2
return
build_norm_layer
(
cfg_
,
num_features
)
else
:
return
build_norm_layer
(
cfg_
,
num_features
)
class
BasicBlock
(
BaseModule
):
"""BasicBlock in DLANet.
Args:
in_channels (int): Input feature channel.
out_channels (int): Output feature channel.
norm_cfg (dict): Dictionary to construct and config
norm layer.
conv_cfg (dict): Dictionary to construct and config
conv layer.
stride (int, optional): Conv stride. Default: 1.
dilation (int, optional): Conv dilation. Default: 1.
init_cfg (dict, optional): Initialization config.
Default: None.
"""
def
__init__
(
self
,
in_channels
,
out_channels
,
norm_cfg
,
conv_cfg
,
stride
=
1
,
dilation
=
1
,
init_cfg
=
None
):
super
(
BasicBlock
,
self
).
__init__
(
init_cfg
)
self
.
conv1
=
build_conv_layer
(
conv_cfg
,
in_channels
,
out_channels
,
3
,
stride
=
stride
,
padding
=
dilation
,
dilation
=
dilation
,
bias
=
False
)
self
.
norm1
=
dla_build_norm_layer
(
norm_cfg
,
out_channels
)[
1
]
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
self
.
conv2
=
build_conv_layer
(
conv_cfg
,
out_channels
,
out_channels
,
3
,
stride
=
1
,
padding
=
dilation
,
dilation
=
dilation
,
bias
=
False
)
self
.
norm2
=
dla_build_norm_layer
(
norm_cfg
,
out_channels
)[
1
]
self
.
stride
=
stride
def
forward
(
self
,
x
,
identity
=
None
):
"""Forward function."""
if
identity
is
None
:
identity
=
x
out
=
self
.
conv1
(
x
)
out
=
self
.
norm1
(
out
)
out
=
self
.
relu
(
out
)
out
=
self
.
conv2
(
out
)
out
=
self
.
norm2
(
out
)
out
+=
identity
out
=
self
.
relu
(
out
)
return
out
class
Root
(
BaseModule
):
"""Root in DLANet.
Args:
in_channels (int): Input feature channel.
out_channels (int): Output feature channel.
norm_cfg (dict): Dictionary to construct and config
norm layer.
conv_cfg (dict): Dictionary to construct and config
conv layer.
kernel_size (int): Size of convolution kernel.
add_identity (bool): Whether to add identity in root.
init_cfg (dict, optional): Initialization config.
Default: None.
"""
def
__init__
(
self
,
in_channels
,
out_channels
,
norm_cfg
,
conv_cfg
,
kernel_size
,
add_identity
,
init_cfg
=
None
):
super
(
Root
,
self
).
__init__
(
init_cfg
)
self
.
conv
=
build_conv_layer
(
conv_cfg
,
in_channels
,
out_channels
,
1
,
stride
=
1
,
padding
=
(
kernel_size
-
1
)
//
2
,
bias
=
False
)
self
.
norm
=
dla_build_norm_layer
(
norm_cfg
,
out_channels
)[
1
]
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
self
.
add_identity
=
add_identity
def
forward
(
self
,
feat_list
):
"""Forward function.
Args:
feat_list (list[torch.Tensor]): Output features from
multiple layers.
"""
children
=
feat_list
x
=
self
.
conv
(
torch
.
cat
(
feat_list
,
1
))
x
=
self
.
norm
(
x
)
if
self
.
add_identity
:
x
+=
children
[
0
]
x
=
self
.
relu
(
x
)
return
x
class
Tree
(
BaseModule
):
"""Tree in DLANet.
Args:
levels (int): The level of the tree.
block (nn.Module): The block module in tree.
in_channels: Input feature channel.
out_channels: Output feature channel.
norm_cfg (dict): Dictionary to construct and config
norm layer.
conv_cfg (dict): Dictionary to construct and config
conv layer.
stride (int, optional): Convolution stride.
Default: 1.
level_root (bool, optional): whether belongs to the
root layer.
root_dim (int, optional): Root input feature channel.
root_kernel_size (int, optional): Size of root
convolution kernel. Default: 1.
dilation (int, optional): Conv dilation. Default: 1.
add_identity (bool, optional): Whether to add
identity in root. Default: False.
init_cfg (dict, optional): Initialization config.
Default: None.
"""
def
__init__
(
self
,
levels
,
block
,
in_channels
,
out_channels
,
norm_cfg
,
conv_cfg
,
stride
=
1
,
level_root
=
False
,
root_dim
=
None
,
root_kernel_size
=
1
,
dilation
=
1
,
add_identity
=
False
,
init_cfg
=
None
):
super
(
Tree
,
self
).
__init__
(
init_cfg
)
if
root_dim
is
None
:
root_dim
=
2
*
out_channels
if
level_root
:
root_dim
+=
in_channels
if
levels
==
1
:
self
.
root
=
Root
(
root_dim
,
out_channels
,
norm_cfg
,
conv_cfg
,
root_kernel_size
,
add_identity
)
self
.
tree1
=
block
(
in_channels
,
out_channels
,
norm_cfg
,
conv_cfg
,
stride
,
dilation
=
dilation
)
self
.
tree2
=
block
(
out_channels
,
out_channels
,
norm_cfg
,
conv_cfg
,
1
,
dilation
=
dilation
)
else
:
self
.
tree1
=
Tree
(
levels
-
1
,
block
,
in_channels
,
out_channels
,
norm_cfg
,
conv_cfg
,
stride
,
root_dim
=
None
,
root_kernel_size
=
root_kernel_size
,
dilation
=
dilation
,
add_identity
=
add_identity
)
self
.
tree2
=
Tree
(
levels
-
1
,
block
,
out_channels
,
out_channels
,
norm_cfg
,
conv_cfg
,
root_dim
=
root_dim
+
out_channels
,
root_kernel_size
=
root_kernel_size
,
dilation
=
dilation
,
add_identity
=
add_identity
)
self
.
level_root
=
level_root
self
.
root_dim
=
root_dim
self
.
downsample
=
None
self
.
project
=
None
self
.
levels
=
levels
if
stride
>
1
:
self
.
downsample
=
nn
.
MaxPool2d
(
stride
,
stride
=
stride
)
if
in_channels
!=
out_channels
:
self
.
project
=
nn
.
Sequential
(
build_conv_layer
(
conv_cfg
,
in_channels
,
out_channels
,
1
,
stride
=
1
,
bias
=
False
),
dla_build_norm_layer
(
norm_cfg
,
out_channels
)[
1
])
def
forward
(
self
,
x
,
identity
=
None
,
children
=
None
):
children
=
[]
if
children
is
None
else
children
bottom
=
self
.
downsample
(
x
)
if
self
.
downsample
else
x
identity
=
self
.
project
(
bottom
)
if
self
.
project
else
bottom
if
self
.
level_root
:
children
.
append
(
bottom
)
x1
=
self
.
tree1
(
x
,
identity
)
if
self
.
levels
==
1
:
x2
=
self
.
tree2
(
x1
)
feat_list
=
[
x2
,
x1
]
+
children
x
=
self
.
root
(
feat_list
)
else
:
children
.
append
(
x1
)
x
=
self
.
tree2
(
x1
,
children
=
children
)
return
x
@
BACKBONES
.
register_module
()
class
DLANet
(
BaseModule
):
r
"""`DLA backbone <https://arxiv.org/abs/1707.06484>`_.
Args:
depth (int): Depth of DLA. Default: 34.
in_channels (int, optional): Number of input image channels.
Default: 3.
norm_cfg (dict, optional): Dictionary to construct and config
norm layer. Default: None.
conv_cfg (dict, optional): Dictionary to construct and config
conv layer. Default: None.
layer_with_level_root (list[bool], optional): Whether to apply
level_root in each DLA layer, this is only used for
tree levels. Default: (False, True, True, True).
with_identity_root (bool, optional): Whether to add identity
in root layer. Default: False.
pretrained (str, optional): model pretrained path.
Default: None.
init_cfg (dict or list[dict], optional): Initialization
config dict. Default: None
"""
arch_settings
=
{
34
:
(
BasicBlock
,
(
1
,
1
,
1
,
2
,
2
,
1
),
(
16
,
32
,
64
,
128
,
256
,
512
)),
}
def
__init__
(
self
,
depth
,
in_channels
=
3
,
out_indices
=
(
0
,
1
,
2
,
3
,
4
,
5
),
frozen_stages
=-
1
,
norm_cfg
=
None
,
conv_cfg
=
None
,
layer_with_level_root
=
(
False
,
True
,
True
,
True
),
with_identity_root
=
False
,
pretrained
=
None
,
init_cfg
=
None
):
super
(
DLANet
,
self
).
__init__
(
init_cfg
)
if
depth
not
in
self
.
arch_settings
:
raise
KeyError
(
f
'invalida depth
{
depth
}
for DLA'
)
assert
not
(
init_cfg
and
pretrained
),
\
'init_cfg and pretrained cannot be setting at the same time'
if
isinstance
(
pretrained
,
str
):
warnings
.
warn
(
'DeprecationWarning: pretrained is a deprecated, '
'please use "init_cfg" instead'
)
self
.
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
pretrained
)
elif
pretrained
is
None
:
if
init_cfg
is
None
:
self
.
init_cfg
=
[
dict
(
type
=
'Kaiming'
,
layer
=
'Conv2d'
),
dict
(
type
=
'Constant'
,
val
=
1
,
layer
=
[
'_BatchNorm'
,
'GroupNorm'
])
]
block
,
levels
,
channels
=
self
.
arch_settings
[
depth
]
self
.
channels
=
channels
self
.
num_levels
=
len
(
levels
)
self
.
frozen_stages
=
frozen_stages
self
.
out_indices
=
out_indices
assert
max
(
out_indices
)
<
self
.
num_levels
self
.
base_layer
=
nn
.
Sequential
(
build_conv_layer
(
conv_cfg
,
in_channels
,
channels
[
0
],
7
,
stride
=
1
,
padding
=
3
,
bias
=
False
),
dla_build_norm_layer
(
norm_cfg
,
channels
[
0
])[
1
],
nn
.
ReLU
(
inplace
=
True
))
# DLANet first uses two conv layers then uses several
# Tree layers
for
i
in
range
(
2
):
level_layer
=
self
.
_make_conv_level
(
channels
[
0
],
channels
[
i
],
levels
[
i
],
norm_cfg
,
conv_cfg
,
stride
=
i
+
1
)
layer_name
=
f
'level
{
i
}
'
self
.
add_module
(
layer_name
,
level_layer
)
for
i
in
range
(
2
,
self
.
num_levels
):
dla_layer
=
Tree
(
levels
[
i
],
block
,
channels
[
i
-
1
],
channels
[
i
],
norm_cfg
,
conv_cfg
,
2
,
level_root
=
layer_with_level_root
[
i
-
2
],
add_identity
=
with_identity_root
)
layer_name
=
f
'level
{
i
}
'
self
.
add_module
(
layer_name
,
dla_layer
)
self
.
_freeze_stages
()
def
_make_conv_level
(
self
,
in_channels
,
out_channels
,
num_convs
,
norm_cfg
,
conv_cfg
,
stride
=
1
,
dilation
=
1
):
"""Conv modules.
Args:
in_channels (int): Input feature channel.
out_channels (int): Output feature channel.
num_convs (int): Number of Conv module.
norm_cfg (dict): Dictionary to construct and config
norm layer.
conv_cfg (dict): Dictionary to construct and config
conv layer.
stride (int, optional): Conv stride. Default: 1.
dilation (int, optional): Conv dilation. Default: 1.
"""
modules
=
[]
for
i
in
range
(
num_convs
):
modules
.
extend
([
build_conv_layer
(
conv_cfg
,
in_channels
,
out_channels
,
3
,
stride
=
stride
if
i
==
0
else
1
,
padding
=
dilation
,
bias
=
False
,
dilation
=
dilation
),
dla_build_norm_layer
(
norm_cfg
,
out_channels
)[
1
],
nn
.
ReLU
(
inplace
=
True
)
])
in_channels
=
out_channels
return
nn
.
Sequential
(
*
modules
)
def
_freeze_stages
(
self
):
if
self
.
frozen_stages
>=
0
:
self
.
base_layer
.
eval
()
for
param
in
self
.
base_layer
.
parameters
():
param
.
requires_grad
=
False
for
i
in
range
(
2
):
m
=
getattr
(
self
,
f
'level
{
i
}
'
)
m
.
eval
()
for
param
in
m
.
parameters
():
param
.
requires_grad
=
False
for
i
in
range
(
1
,
self
.
frozen_stages
+
1
):
m
=
getattr
(
self
,
f
'level
{
i
+
1
}
'
)
m
.
eval
()
for
param
in
m
.
parameters
():
param
.
requires_grad
=
False
def
forward
(
self
,
x
):
outs
=
[]
x
=
self
.
base_layer
(
x
)
for
i
in
range
(
self
.
num_levels
):
x
=
getattr
(
self
,
'level{}'
.
format
(
i
))(
x
)
if
i
in
self
.
out_indices
:
outs
.
append
(
x
)
return
tuple
(
outs
)
mmdet3d/models/backbones/multi_backbone.py
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
import
copy
import
torch
import
warnings
import
torch
from
mmcv.cnn
import
ConvModule
from
mmcv.runner
import
BaseModule
,
auto_fp16
from
torch
import
nn
as
nn
...
...
Prev
1
…
6
7
8
9
10
11
12
13
14
…
21
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment