Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
5a1575a0
Commit
5a1575a0
authored
Jun 19, 2020
by
zhangwenwei
Browse files
Merge branch 'add-ori-mvx' into 'master'
Add ori mvx See merge request open-mmlab/mmdet.3d!79
parents
0ed9c576
3298db8a
Changes
20
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
139 additions
and
896 deletions
+139
-896
.gitlab-ci.yml
.gitlab-ci.yml
+1
-1
configs/mvxnet/dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class.py
...dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class.py
+22
-32
configs/mvxnet/faster_rcnn_regnet-3gf_fpn_2x8_1x_nus.py
configs/mvxnet/faster_rcnn_regnet-3gf_fpn_2x8_1x_nus.py
+0
-206
configs/mvxnet/retinanet_r50_fpn_caffe_2x8_1x_nus.py
configs/mvxnet/retinanet_r50_fpn_caffe_2x8_1x_nus.py
+0
-141
docs/api.rst
docs/api.rst
+0
-5
mmdet3d/apis/__init__.py
mmdet3d/apis/__init__.py
+0
-3
mmdet3d/apis/train.py
mmdet3d/apis/train.py
+0
-122
mmdet3d/datasets/kitti_dataset.py
mmdet3d/datasets/kitti_dataset.py
+51
-13
mmdet3d/datasets/pipelines/__init__.py
mmdet3d/datasets/pipelines/__init__.py
+3
-3
mmdet3d/datasets/pipelines/dbsampler.py
mmdet3d/datasets/pipelines/dbsampler.py
+0
-254
mmdet3d/models/detectors/base.py
mmdet3d/models/detectors/base.py
+7
-56
mmdet3d/models/detectors/mvx_two_stage.py
mmdet3d/models/detectors/mvx_two_stage.py
+6
-6
mmdet3d/models/detectors/parta2.py
mmdet3d/models/detectors/parta2.py
+7
-40
mmdet3d/models/detectors/single_stage.py
mmdet3d/models/detectors/single_stage.py
+15
-0
mmdet3d/models/detectors/two_stage.py
mmdet3d/models/detectors/two_stage.py
+15
-0
mmdet3d/models/fusion_layers/point_fusion.py
mmdet3d/models/fusion_layers/point_fusion.py
+1
-1
requirements/runtime.txt
requirements/runtime.txt
+1
-1
tests/test_config.py
tests/test_config.py
+0
-2
tests/test_forward.py
tests/test_forward.py
+9
-8
tools/train.py
tools/train.py
+1
-2
No files found.
.gitlab-ci.yml
View file @
5a1575a0
...
@@ -27,7 +27,7 @@ linting:
...
@@ -27,7 +27,7 @@ linting:
stage
:
test
stage
:
test
script
:
script
:
-
echo "Start building..."
-
echo "Start building..."
-
pip install "git+https://github.com/
cocodataset
/cocoapi.git#subdirectory=
PythonAPI
"
-
pip install "git+https://github.com/
open-mmlab
/cocoapi.git#subdirectory=
pycocotools
"
-
pip install git+https://github.com/open-mmlab/mmcv.git
-
pip install git+https://github.com/open-mmlab/mmcv.git
-
pip install git+https://github.com/open-mmlab/mmdetection.git
-
pip install git+https://github.com/open-mmlab/mmdetection.git
-
python -c "import mmdet; print(mmdet.__version__)"
-
python -c "import mmdet; print(mmdet.__version__)"
...
...
configs/mvxnet/dv_mvx-
v2
_second_secfpn_
fpn-fusion_
adamw_2x8_80e_kitti-3d-3class.py
→
configs/mvxnet/dv_mvx-
fpn
_second_secfpn_adamw_2x8_80e_kitti-3d-3class.py
View file @
5a1575a0
...
@@ -4,8 +4,6 @@ point_cloud_range = [0, -40, -3, 70.4, 40, 1]
...
@@ -4,8 +4,6 @@ point_cloud_range = [0, -40, -3, 70.4, 40, 1]
model
=
dict
(
model
=
dict
(
type
=
'DynamicMVXFasterRCNN'
,
type
=
'DynamicMVXFasterRCNN'
,
pretrained
=
(
'./pretrain_detectron/'
'ImageNetPretrained/MSRA/resnet50_msra.pth'
),
img_backbone
=
dict
(
img_backbone
=
dict
(
type
=
'ResNet'
,
type
=
'ResNet'
,
depth
=
50
,
depth
=
50
,
...
@@ -136,22 +134,10 @@ class_names = ['Pedestrian', 'Cyclist', 'Car']
...
@@ -136,22 +134,10 @@ class_names = ['Pedestrian', 'Cyclist', 'Car']
img_norm_cfg
=
dict
(
img_norm_cfg
=
dict
(
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
to_rgb
=
False
)
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
to_rgb
=
False
)
input_modality
=
dict
(
use_lidar
=
True
,
use_camera
=
True
)
input_modality
=
dict
(
use_lidar
=
True
,
use_camera
=
True
)
db_sampler
=
dict
(
type
=
'MMDataBaseSampler'
,
data_root
=
data_root
,
info_path
=
data_root
+
'kitti_mm_dbinfos_train.pkl'
,
rate
=
1.0
,
object_rot_range
=
[
0.0
,
0.0
],
blending_type
=
[
'box'
,
'gaussian'
,
'poisson'
],
depth_consistent
=
True
,
check_2D_collision
=
True
,
collision_thr
=
[
0
,
0.3
,
0.5
,
0.7
],
prepare
=
dict
(
filter_by_difficulty
=
[
-
1
],
filter_by_min_points
=
dict
(
Car
=
5
,
Pedestrian
=
10
,
Cyclist
=
10
)),
sample_groups
=
dict
(
Car
=
12
,
Pedestrian
=
6
,
Cyclist
=
6
),
classes
=
class_names
)
train_pipeline
=
[
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
),
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
),
dict
(
dict
(
type
=
'Resize'
,
type
=
'Resize'
,
img_scale
=
[(
640
,
192
),
(
2560
,
768
)],
img_scale
=
[(
640
,
192
),
(
2560
,
768
)],
...
@@ -171,10 +157,11 @@ train_pipeline = [
...
@@ -171,10 +157,11 @@ train_pipeline = [
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
dict
(
type
=
'Collect3D'
,
type
=
'Collect3D'
,
keys
=
[
'points'
,
'img'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
keys
=
[
'points'
,
'img'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
,
]
]
test_pipeline
=
[
test_pipeline
=
[
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
),
dict
(
type
=
'LoadImageFromFile'
),
dict
(
dict
(
type
=
'MultiScaleFlipAug3D'
,
type
=
'MultiScaleFlipAug3D'
,
img_scale
=
(
1280
,
384
),
img_scale
=
(
1280
,
384
),
...
@@ -196,7 +183,7 @@ test_pipeline = [
...
@@ -196,7 +183,7 @@ test_pipeline = [
type
=
'DefaultFormatBundle3D'
,
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
class_names
=
class_names
,
with_label
=
False
),
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'img'
])
])
])
]
]
...
@@ -204,15 +191,18 @@ data = dict(
...
@@ -204,15 +191,18 @@ data = dict(
samples_per_gpu
=
2
,
samples_per_gpu
=
2
,
workers_per_gpu
=
2
,
workers_per_gpu
=
2
,
train
=
dict
(
train
=
dict
(
type
=
dataset_type
,
type
=
'RepeatDataset'
,
data_root
=
data_root
,
times
=
2
,
ann_file
=
data_root
+
'kitti_infos_train.pkl'
,
dataset
=
dict
(
split
=
'training'
,
type
=
dataset_type
,
pts_prefix
=
'velodyne_reduced'
,
data_root
=
data_root
,
pipeline
=
train_pipeline
,
ann_file
=
data_root
+
'kitti_infos_train.pkl'
,
modality
=
input_modality
,
split
=
'training'
,
classes
=
class_names
,
pts_prefix
=
'velodyne_reduced'
,
test_mode
=
False
),
pipeline
=
train_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
False
)),
val
=
dict
(
val
=
dict
(
type
=
dataset_type
,
type
=
dataset_type
,
data_root
=
data_root
,
data_root
=
data_root
,
...
@@ -255,10 +245,10 @@ log_config = dict(
...
@@ -255,10 +245,10 @@ log_config = dict(
# yapf:enable
# yapf:enable
evaluation
=
dict
(
interval
=
1
)
evaluation
=
dict
(
interval
=
1
)
# runtime settings
# runtime settings
total_epochs
=
8
0
total_epochs
=
4
0
dist_params
=
dict
(
backend
=
'nccl'
)
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
log_level
=
'INFO'
work_dir
=
'./work_dirs/sec_secfpn_80e'
work_dir
=
None
load_from
=
'./pretrain_mmdet/mvx_faster_rcnn_
r50_fpn_
detectron2-caffe_
freezeBN_l1-loss_roialign-v2_1x_coco-3-class_44.7_20200205-b1c1533f
.pth'
# noqa
load_from
=
'./pretrain_mmdet/mvx_faster_rcnn_detectron2-caffe_
20e_coco-pretrain_gt-sample_kitti-3-class_moderate-79.3_20200207-a4a6a3c7
.pth'
# noqa
resume_from
=
None
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
workflow
=
[(
'train'
,
1
)]
configs/mvxnet/faster_rcnn_regnet-3gf_fpn_2x8_1x_nus.py
deleted
100644 → 0
View file @
0ed9c576
# model settings
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
False
)
model
=
dict
(
type
=
'FasterRCNN'
,
pretrained
=
'open-mmlab://regnetx_3.2gf'
,
backbone
=
dict
(
type
=
'RegNet'
,
arch
=
'regnetx_3.2gf'
,
out_indices
=
(
0
,
1
,
2
,
3
),
frozen_stages
=
1
,
base_channels
=
32
,
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
True
),
norm_eval
=
True
,
style
=
'pytorch'
),
neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
96
,
192
,
432
,
1008
],
out_channels
=
256
,
num_outs
=
5
),
rpn_head
=
dict
(
type
=
'RPNHead'
,
in_channels
=
256
,
feat_channels
=
256
,
anchor_generator
=
dict
(
type
=
'AnchorGenerator'
,
scales
=
[
8
],
ratios
=
[
0.5
,
1.0
,
2.0
],
strides
=
[
4
,
8
,
16
,
32
,
64
]),
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[.
0
,
.
0
,
.
0
,
.
0
],
target_stds
=
[
1.0
,
1.0
,
1.0
,
1.0
]),
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
1.0
)),
roi_head
=
dict
(
type
=
'StandardRoIHead'
,
bbox_roi_extractor
=
dict
(
type
=
'SingleRoIExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAlign'
,
out_size
=
7
,
sample_num
=
0
),
out_channels
=
256
,
featmap_strides
=
[
4
,
8
,
16
,
32
]),
bbox_head
=
dict
(
type
=
'Shared2FCBBoxHead'
,
in_channels
=
256
,
fc_out_channels
=
1024
,
roi_feat_size
=
7
,
num_classes
=
10
,
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[
0.
,
0.
,
0.
,
0.
],
target_stds
=
[
0.1
,
0.1
,
0.2
,
0.2
]),
reg_class_agnostic
=
False
,
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
1.0
))))
# model training and testing settings
train_cfg
=
dict
(
rpn
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.7
,
neg_iou_thr
=
0.3
,
min_pos_iou
=
0.3
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
256
,
pos_fraction
=
0.5
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
False
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
),
rpn_proposal
=
dict
(
nms_across_levels
=
False
,
nms_pre
=
2000
,
# following the setting of detectron,
# which improves ~0.2 bbox mAP.
nms_post
=
1000
,
max_num
=
1000
,
nms_thr
=
0.7
,
min_bbox_size
=
0
),
rcnn
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.5
,
min_pos_iou
=
0.5
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
512
,
pos_fraction
=
0.25
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
True
),
pos_weight
=-
1
,
debug
=
False
))
test_cfg
=
dict
(
rpn
=
dict
(
nms_across_levels
=
False
,
nms_pre
=
1000
,
nms_post
=
1000
,
max_num
=
1000
,
nms_thr
=
0.7
,
min_bbox_size
=
0
),
rcnn
=
dict
(
score_thr
=
0.05
,
nms
=
dict
(
type
=
'nms'
,
iou_thr
=
0.5
),
max_per_img
=
100
)
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type
=
'CocoDataset'
data_root
=
'data/nuscenes/'
classes
=
(
'car'
,
'truck'
,
'trailer'
,
'bus'
,
'construction_vehicle'
,
'bicycle'
,
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
)
img_norm_cfg
=
dict
(
# The mean and std is used in PyCls when training RegNets
mean
=
[
103.53
,
116.28
,
123.675
],
std
=
[
57.375
,
57.12
,
58.395
],
to_rgb
=
False
)
file_client_args
=
dict
(
backend
=
'petrel'
,
path_mapping
=
dict
({
'./data/nuscenes/'
:
's3://nuscenes/nuscenes/'
,
'data/nuscenes/'
:
's3://nuscenes/nuscenes/'
}))
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
,
with_mask
=
False
,
file_client_args
=
file_client_args
),
dict
(
type
=
'Resize'
,
img_scale
=
(
1280
,
720
),
ratio_range
=
(
0.75
,
1.25
),
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
,
file_client_args
=
file_client_args
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
1280
,
720
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
]),
])
]
data
=
dict
(
samples_per_gpu
=
2
,
workers_per_gpu
=
2
,
train
=
dict
(
type
=
dataset_type
,
classes
=
classes
,
ann_file
=
data_root
+
'nuscenes_infos_train.coco.json'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
classes
=
classes
,
ann_file
=
data_root
+
'nuscenes_infos_val.coco.json'
,
pipeline
=
test_pipeline
),
test
=
dict
(
type
=
dataset_type
,
classes
=
classes
,
ann_file
=
data_root
+
'nuscenes_infos_val.coco.json'
,
pipeline
=
test_pipeline
))
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.02
,
momentum
=
0.9
,
weight_decay
=
0.00005
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
1000
,
warmup_ratio
=
1.0
/
1000
,
step
=
[
8
,
11
])
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
# yapf:enable
evaluation
=
dict
(
interval
=
1
)
# runtime settings
total_epochs
=
12
dist_params
=
dict
(
backend
=
'nccl'
,
port
=
29501
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/faster_rcnn_r50_fpn_1x'
load_from
=
'./pretrain_mmdet/mask_rcnn_regnetx-3GF_fpn_mstrain_3x_coco_box-AP-43.1_mask-AP-38.7-e003695a.pth'
# noqa
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
configs/mvxnet/retinanet_r50_fpn_caffe_2x8_1x_nus.py
deleted
100644 → 0
View file @
0ed9c576
# model settings
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
False
)
model
=
dict
(
type
=
'RetinaNet'
,
pretrained
=
(
'open-mmlab://resnet50_caffe_bgr'
),
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
frozen_stages
=
1
,
norm_cfg
=
norm_cfg
,
norm_eval
=
True
,
style
=
'caffe'
),
neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
256
,
512
,
1024
,
2048
],
out_channels
=
256
,
start_level
=
1
,
add_extra_convs
=
True
,
num_outs
=
5
),
bbox_head
=
dict
(
type
=
'RetinaHead'
,
num_classes
=
10
,
in_channels
=
256
,
stacked_convs
=
4
,
feat_channels
=
256
,
anchor_generator
=
dict
(
type
=
'AnchorGenerator'
,
octave_base_scale
=
4
,
scales_per_octave
=
3
,
ratios
=
[
0.5
,
1.0
,
2.0
],
strides
=
[
8
,
16
,
32
,
64
,
128
]),
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[.
0
,
.
0
,
.
0
,
.
0
],
target_stds
=
[
1.0
,
1.0
,
1.0
,
1.0
]),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
1.0
)))
# training and testing settings
train_cfg
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.4
,
min_pos_iou
=
0
,
ignore_iof_thr
=-
1
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
)
test_cfg
=
dict
(
nms_pre
=
1000
,
min_bbox_size
=
0
,
score_thr
=
0.05
,
nms
=
dict
(
type
=
'nms'
,
iou_thr
=
0.5
),
max_per_img
=
100
)
# dataset settings
dataset_type
=
'NuScenes2DDataset'
data_root
=
'data/nuscenes/'
# Values to be used for image normalization (BGR order)
# Default mean pixel value are from ImageNet: [103.53, 116.28, 123.675]
# When using pre-trained models in Detectron1 or any MSRA models,
# std has been absorbed into its conv1 weights, so the std needs to be set 1.
img_norm_cfg
=
dict
(
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
to_rgb
=
False
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
),
dict
(
type
=
'Resize'
,
img_scale
=
(
1600
,
900
),
ratio_range
=
(
0.8
,
1.2
),
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
1600
,
900
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
]),
])
]
data
=
dict
(
samples_per_gpu
=
2
,
workers_per_gpu
=
2
,
train
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'nuscenes_infos_train.coco.json'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'nuscenes_infos_val.coco.json'
,
pipeline
=
test_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'nuscenes_infos_val.coco.json'
,
pipeline
=
test_pipeline
))
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.01
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
1000
,
warmup_ratio
=
1.0
/
1000
,
step
=
[
8
,
11
])
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
# yapf:enable
# runtime settings
total_epochs
=
12
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/retinanet_r50_fpn_1x'
load_from
=
None
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
docs/api.rst
View file @
5a1575a0
API Documentation
API Documentation
=================
=================
mmdet3d.apis
--------------
.. automodule:: mmdet3d.apis
:members:
mmdet3d.core
mmdet3d.core
--------------
--------------
...
...
mmdet3d/apis/__init__.py
deleted
100644 → 0
View file @
0ed9c576
from
.train
import
batch_processor
,
train_detector
__all__
=
[
'batch_processor'
,
'train_detector'
]
mmdet3d/apis/train.py
deleted
100644 → 0
View file @
0ed9c576
import
torch
from
mmcv.parallel
import
MMDataParallel
,
MMDistributedDataParallel
from
mmcv.runner
import
DistSamplerSeedHook
,
Runner
,
build_optimizer
from
mmdet3d.utils
import
get_root_logger
from
mmdet.apis.train
import
parse_losses
from
mmdet.core
import
(
DistEvalHook
,
DistOptimizerHook
,
EvalHook
,
Fp16OptimizerHook
)
from
mmdet.datasets
import
build_dataloader
,
build_dataset
def
batch_processor
(
model
,
data
,
train_mode
):
"""Process a data batch.
This method is required as an argument of Runner, which defines how to
process a data batch and obtain proper outputs. The first 3 arguments of
batch_processor are fixed.
Args:
model (nn.Module): A PyTorch model.
data (dict): The data batch in a dict.
train_mode (bool): Training mode or not. It may be useless for some
models.
Returns:
dict: A dict containing losses and log vars.
"""
losses
=
model
(
**
data
)
loss
,
log_vars
=
parse_losses
(
losses
)
if
'img_metas'
in
data
:
num_samples
=
len
(
data
[
'img_metas'
].
data
)
else
:
num_samples
=
len
(
data
[
'img'
].
data
)
outputs
=
dict
(
loss
=
loss
,
log_vars
=
log_vars
,
num_samples
=
num_samples
)
return
outputs
def
train_detector
(
model
,
dataset
,
cfg
,
distributed
=
False
,
validate
=
False
,
timestamp
=
None
,
meta
=
None
):
logger
=
get_root_logger
(
cfg
.
log_level
)
# prepare data loaders
dataset
=
dataset
if
isinstance
(
dataset
,
(
list
,
tuple
))
else
[
dataset
]
data_loaders
=
[
build_dataloader
(
ds
,
cfg
.
data
.
samples_per_gpu
,
cfg
.
data
.
workers_per_gpu
,
# cfg.gpus will be ignored if distributed
len
(
cfg
.
gpu_ids
),
dist
=
distributed
,
seed
=
cfg
.
seed
)
for
ds
in
dataset
]
# put model on gpus
if
distributed
:
find_unused_parameters
=
cfg
.
get
(
'find_unused_parameters'
,
False
)
# Sets the `find_unused_parameters` parameter in
# torch.nn.parallel.DistributedDataParallel
model
=
MMDistributedDataParallel
(
model
.
cuda
(),
device_ids
=
[
torch
.
cuda
.
current_device
()],
broadcast_buffers
=
False
,
find_unused_parameters
=
find_unused_parameters
)
else
:
model
=
MMDataParallel
(
model
.
cuda
(
cfg
.
gpu_ids
[
0
]),
device_ids
=
cfg
.
gpu_ids
)
# build runner
optimizer
=
build_optimizer
(
model
,
cfg
.
optimizer
)
runner
=
Runner
(
model
,
batch_processor
,
optimizer
,
cfg
.
work_dir
,
logger
=
logger
,
meta
=
meta
)
# an ugly walkaround to make the .log and .log.json filenames the same
runner
.
timestamp
=
timestamp
# fp16 setting
fp16_cfg
=
cfg
.
get
(
'fp16'
,
None
)
if
fp16_cfg
is
not
None
:
optimizer_config
=
Fp16OptimizerHook
(
**
cfg
.
optimizer_config
,
**
fp16_cfg
,
distributed
=
distributed
)
elif
distributed
and
'type'
not
in
cfg
.
optimizer_config
:
optimizer_config
=
DistOptimizerHook
(
**
cfg
.
optimizer_config
)
else
:
optimizer_config
=
cfg
.
optimizer_config
# register hooks
runner
.
register_training_hooks
(
cfg
.
lr_config
,
optimizer_config
,
cfg
.
checkpoint_config
,
cfg
.
log_config
,
cfg
.
get
(
'momentum_config'
,
None
))
if
distributed
:
runner
.
register_hook
(
DistSamplerSeedHook
())
# register eval hooks
if
validate
:
val_dataset
=
build_dataset
(
cfg
.
data
.
val
,
dict
(
test_mode
=
True
))
val_dataloader
=
build_dataloader
(
val_dataset
,
samples_per_gpu
=
1
,
workers_per_gpu
=
cfg
.
data
.
workers_per_gpu
,
dist
=
distributed
,
shuffle
=
False
)
eval_cfg
=
cfg
.
get
(
'evaluation'
,
{})
eval_hook
=
DistEvalHook
if
distributed
else
EvalHook
runner
.
register_hook
(
eval_hook
(
val_dataloader
,
**
eval_cfg
))
if
cfg
.
resume_from
:
runner
.
resume
(
cfg
.
resume_from
)
elif
cfg
.
load_from
:
runner
.
load_checkpoint
(
cfg
.
load_from
)
runner
.
run
(
data_loaders
,
cfg
.
workflow
,
cfg
.
total_epochs
)
mmdet3d/datasets/kitti_dataset.py
View file @
5a1575a0
...
@@ -52,7 +52,7 @@ class KittiDataset(Custom3DDataset):
...
@@ -52,7 +52,7 @@ class KittiDataset(Custom3DDataset):
def
get_data_info
(
self
,
index
):
def
get_data_info
(
self
,
index
):
info
=
self
.
data_infos
[
index
]
info
=
self
.
data_infos
[
index
]
sample_idx
=
info
[
'image'
][
'image_idx'
]
sample_idx
=
info
[
'image'
][
'image_idx'
]
img_filename
=
os
.
path
.
join
(
self
.
root_spli
t
,
img_filename
=
os
.
path
.
join
(
self
.
data_roo
t
,
info
[
'image'
][
'image_path'
])
info
[
'image'
][
'image_path'
])
# TODO: consider use torch.Tensor only
# TODO: consider use torch.Tensor only
...
@@ -65,7 +65,8 @@ class KittiDataset(Custom3DDataset):
...
@@ -65,7 +65,8 @@ class KittiDataset(Custom3DDataset):
input_dict
=
dict
(
input_dict
=
dict
(
sample_idx
=
sample_idx
,
sample_idx
=
sample_idx
,
pts_filename
=
pts_filename
,
pts_filename
=
pts_filename
,
img_filename
=
img_filename
,
img_prefix
=
None
,
img_info
=
dict
(
filename
=
img_filename
),
lidar2img
=
lidar2img
)
lidar2img
=
lidar2img
)
if
not
self
.
test_mode
:
if
not
self
.
test_mode
:
...
@@ -113,8 +114,8 @@ class KittiDataset(Custom3DDataset):
...
@@ -113,8 +114,8 @@ class KittiDataset(Custom3DDataset):
anns_results
=
dict
(
anns_results
=
dict
(
gt_bboxes_3d
=
gt_bboxes_3d
,
gt_bboxes_3d
=
gt_bboxes_3d
,
gt_labels_3d
=
gt_labels_3d
,
gt_labels_3d
=
gt_labels_3d
,
gt_
bboxes
=
gt_bboxes
,
bboxes
=
gt_bboxes
,
gt_
labels
=
gt_labels
)
labels
=
gt_labels
)
return
anns_results
return
anns_results
def
drop_arrays_by_name
(
self
,
gt_names
,
used_classes
):
def
drop_arrays_by_name
(
self
,
gt_names
,
used_classes
):
...
@@ -151,6 +152,24 @@ class KittiDataset(Custom3DDataset):
...
@@ -151,6 +152,24 @@ class KittiDataset(Custom3DDataset):
result_files
=
self
.
bbox2result_kitti2d
(
outputs
,
self
.
CLASSES
,
result_files
=
self
.
bbox2result_kitti2d
(
outputs
,
self
.
CLASSES
,
pklfile_prefix
,
pklfile_prefix
,
submission_prefix
)
submission_prefix
)
elif
'pts_bbox'
in
outputs
[
0
]
or
'img_bbox'
in
outputs
[
0
]:
result_files
=
dict
()
for
name
in
outputs
[
0
]:
results_
=
[
out
[
name
]
for
out
in
outputs
]
pklfile_prefix_
=
pklfile_prefix
+
name
if
submission_prefix
is
not
None
:
submission_prefix_
=
submission_prefix
+
name
else
:
submission_prefix_
=
None
if
'img'
in
name
:
result_files
=
self
.
bbox2result_kitti2d
(
results_
,
self
.
CLASSES
,
pklfile_prefix_
,
submission_prefix_
)
else
:
result_files_
=
self
.
bbox2result_kitti
(
results_
,
self
.
CLASSES
,
pklfile_prefix_
,
submission_prefix_
)
result_files
[
name
]
=
result_files_
else
:
else
:
result_files
=
self
.
bbox2result_kitti
(
outputs
,
self
.
CLASSES
,
result_files
=
self
.
bbox2result_kitti
(
outputs
,
self
.
CLASSES
,
pklfile_prefix
,
pklfile_prefix
,
...
@@ -162,8 +181,7 @@ class KittiDataset(Custom3DDataset):
...
@@ -162,8 +181,7 @@ class KittiDataset(Custom3DDataset):
metric
=
None
,
metric
=
None
,
logger
=
None
,
logger
=
None
,
pklfile_prefix
=
None
,
pklfile_prefix
=
None
,
submission_prefix
=
None
,
submission_prefix
=
None
):
result_names
=
[
'pts_bbox'
]):
"""Evaluation in KITTI protocol.
"""Evaluation in KITTI protocol.
Args:
Args:
...
@@ -178,18 +196,38 @@ class KittiDataset(Custom3DDataset):
...
@@ -178,18 +196,38 @@ class KittiDataset(Custom3DDataset):
If not specified, the submission data will not be generated.
If not specified, the submission data will not be generated.
Returns:
Returns:
dict[str: float]
dict[str: float]
: results of each evaluation metric
"""
"""
result_files
,
tmp_dir
=
self
.
format_results
(
results
,
pklfile_prefix
)
result_files
,
tmp_dir
=
self
.
format_results
(
results
,
pklfile_prefix
)
from
mmdet3d.core.evaluation
import
kitti_eval
from
mmdet3d.core.evaluation
import
kitti_eval
gt_annos
=
[
info
[
'annos'
]
for
info
in
self
.
data_infos
]
gt_annos
=
[
info
[
'annos'
]
for
info
in
self
.
data_infos
]
if
metric
==
'img_bbox'
:
ap_result_str
,
ap_dict
=
kitti_eval
(
if
isinstance
(
result_files
,
dict
):
gt_annos
,
result_files
,
self
.
CLASSES
,
eval_types
=
[
'bbox'
])
ap_dict
=
dict
()
for
name
,
result_files_
in
result_files
.
items
():
eval_types
=
[
'bbox'
,
'bev'
,
'3d'
]
if
'img'
in
name
:
eval_types
=
[
'bbox'
]
ap_result_str
,
ap_dict_
=
kitti_eval
(
gt_annos
,
result_files_
,
self
.
CLASSES
,
eval_types
=
eval_types
)
for
ap_type
,
ap
in
ap_dict_
.
items
():
ap_dict
[
f
'
{
name
}
/
{
ap_type
}
'
]
=
float
(
'{:.4f}'
.
format
(
ap
))
print_log
(
f
'Results of
{
name
}
:
\n
'
+
ap_result_str
,
logger
=
logger
)
else
:
else
:
ap_result_str
,
ap_dict
=
kitti_eval
(
gt_annos
,
result_files
,
if
metric
==
'img_bbox'
:
self
.
CLASSES
)
ap_result_str
,
ap_dict
=
kitti_eval
(
print_log
(
'
\n
'
+
ap_result_str
,
logger
=
logger
)
gt_annos
,
result_files
,
self
.
CLASSES
,
eval_types
=
[
'bbox'
])
else
:
ap_result_str
,
ap_dict
=
kitti_eval
(
gt_annos
,
result_files
,
self
.
CLASSES
)
print_log
(
'
\n
'
+
ap_result_str
,
logger
=
logger
)
if
tmp_dir
is
not
None
:
if
tmp_dir
is
not
None
:
tmp_dir
.
cleanup
()
tmp_dir
.
cleanup
()
return
ap_dict
return
ap_dict
...
...
mmdet3d/datasets/pipelines/__init__.py
View file @
5a1575a0
from
mmdet.datasets.pipelines
import
Compose
from
mmdet.datasets.pipelines
import
Compose
from
.dbsampler
import
DataBaseSampler
,
MMDataBaseSampler
from
.dbsampler
import
DataBaseSampler
from
.formating
import
DefaultFormatBundle
,
DefaultFormatBundle3D
from
.formating
import
DefaultFormatBundle
,
DefaultFormatBundle3D
from
.indoor_augment
import
(
IndoorFlipData
,
IndoorGlobalRotScaleTrans
,
from
.indoor_augment
import
(
IndoorFlipData
,
IndoorGlobalRotScaleTrans
,
IndoorPointsColorJitter
)
IndoorPointsColorJitter
)
...
@@ -19,6 +19,6 @@ __all__ = [
...
@@ -19,6 +19,6 @@ __all__ = [
'Compose'
,
'LoadMultiViewImageFromFiles'
,
'LoadPointsFromFile'
,
'Compose'
,
'LoadMultiViewImageFromFiles'
,
'LoadPointsFromFile'
,
'DefaultFormatBundle'
,
'DefaultFormatBundle3D'
,
'DataBaseSampler'
,
'DefaultFormatBundle'
,
'DefaultFormatBundle3D'
,
'DataBaseSampler'
,
'IndoorGlobalRotScaleTrans'
,
'IndoorPointsColorJitter'
,
'IndoorFlipData'
,
'IndoorGlobalRotScaleTrans'
,
'IndoorPointsColorJitter'
,
'IndoorFlipData'
,
'MMDataBaseSampler'
,
'NormalizePointsColor'
,
'LoadAnnotations3D'
,
'NormalizePointsColor'
,
'LoadAnnotations3D'
,
'IndoorPointSample'
,
'IndoorPointSample'
,
'PointSegClassMapping'
,
'MultiScaleFlipAug3D'
'PointSegClassMapping'
,
'MultiScaleFlipAug3D'
]
]
mmdet3d/datasets/pipelines/dbsampler.py
View file @
5a1575a0
...
@@ -2,8 +2,6 @@ import copy
...
@@ -2,8 +2,6 @@ import copy
import
os
import
os
import
pickle
import
pickle
import
cv2
import
mmcv
import
numpy
as
np
import
numpy
as
np
from
mmdet3d.core.bbox
import
box_np_ops
from
mmdet3d.core.bbox
import
box_np_ops
...
@@ -263,255 +261,3 @@ class DataBaseSampler(object):
...
@@ -263,255 +261,3 @@ class DataBaseSampler(object):
boxes
[
i
,
-
1
]
-
sp_boxes
[
i
-
num_gt
,
-
1
])
boxes
[
i
,
-
1
]
-
sp_boxes
[
i
-
num_gt
,
-
1
])
valid_samples
.
append
(
sampled
[
i
-
num_gt
])
valid_samples
.
append
(
sampled
[
i
-
num_gt
])
return
valid_samples
return
valid_samples
@
OBJECTSAMPLERS
.
register_module
()
class
MMDataBaseSampler
(
DataBaseSampler
):
def
__init__
(
self
,
info_path
,
data_root
,
rate
,
prepare
,
object_rot_range
,
sample_groups
,
classes
=
None
,
check_2D_collision
=
False
,
collision_thr
=
0
,
collision_in_classes
=
False
,
depth_consistent
=
False
,
blending_type
=
None
):
super
(
MMDataBaseSampler
,
self
).
__init__
(
info_path
=
info_path
,
data_root
=
data_root
,
rate
=
rate
,
prepare
=
prepare
,
object_rot_range
=
object_rot_range
,
sample_groups
=
sample_groups
,
classes
=
classes
)
self
.
blending_type
=
blending_type
self
.
depth_consistent
=
depth_consistent
self
.
check_2D_collision
=
check_2D_collision
self
.
collision_thr
=
collision_thr
self
.
collision_in_classes
=
collision_in_classes
def
sample_all
(
self
,
gt_bboxes_3d
,
gt_names
,
gt_bboxes_2d
=
None
,
img
=
None
):
sampled_num_dict
=
{}
sample_num_per_class
=
[]
for
class_name
,
max_sample_num
in
zip
(
self
.
sample_classes
,
self
.
sample_max_nums
):
sampled_num
=
int
(
max_sample_num
-
np
.
sum
([
n
==
class_name
for
n
in
gt_names
]))
sampled_num
=
np
.
round
(
self
.
rate
*
sampled_num
).
astype
(
np
.
int64
)
sampled_num_dict
[
class_name
]
=
sampled_num
sample_num_per_class
.
append
(
sampled_num
)
sampled
=
[]
sampled_gt_bboxes_3d
=
[]
sampled_gt_bboxes_2d
=
[]
avoid_coll_boxes_3d
=
gt_bboxes_3d
avoid_coll_boxes_2d
=
gt_bboxes_2d
for
class_name
,
sampled_num
in
zip
(
self
.
sample_classes
,
sample_num_per_class
):
if
sampled_num
>
0
:
sampled_cls
=
self
.
sample_class_v2
(
class_name
,
sampled_num
,
avoid_coll_boxes_3d
,
avoid_coll_boxes_2d
)
sampled
+=
sampled_cls
if
len
(
sampled_cls
)
>
0
:
if
len
(
sampled_cls
)
==
1
:
sampled_gt_box_3d
=
sampled_cls
[
0
][
'box3d_lidar'
][
np
.
newaxis
,
...]
sampled_gt_box_2d
=
sampled_cls
[
0
][
'box2d_camera'
][
np
.
newaxis
,
...]
else
:
sampled_gt_box_3d
=
np
.
stack
(
[
s
[
'box3d_lidar'
]
for
s
in
sampled_cls
],
axis
=
0
)
sampled_gt_box_2d
=
np
.
stack
(
[
s
[
'box2d_camera'
]
for
s
in
sampled_cls
],
axis
=
0
)
sampled_gt_bboxes_3d
+=
[
sampled_gt_box_3d
]
sampled_gt_bboxes_2d
+=
[
sampled_gt_box_2d
]
if
self
.
collision_in_classes
:
# TODO: check whether check collision check among
# classes is necessary
avoid_coll_boxes_3d
=
np
.
concatenate
(
[
avoid_coll_boxes_3d
,
sampled_gt_box_3d
],
axis
=
0
)
avoid_coll_boxes_2d
=
np
.
concatenate
(
[
avoid_coll_boxes_2d
,
sampled_gt_box_2d
],
axis
=
0
)
ret
=
None
if
len
(
sampled
)
>
0
:
sampled_gt_bboxes_3d
=
np
.
concatenate
(
sampled_gt_bboxes_3d
,
axis
=
0
)
sampled_gt_bboxes_2d
=
np
.
concatenate
(
sampled_gt_bboxes_2d
,
axis
=
0
)
s_points_list
=
[]
count
=
0
if
self
.
depth_consistent
:
# change the paster order based on distance
center
=
sampled_gt_bboxes_3d
[:,
0
:
3
]
paste_order
=
np
.
argsort
(
-
np
.
power
(
np
.
sum
(
np
.
power
(
center
,
2
),
axis
=-
1
),
1
/
2
),
axis
=-
1
)
for
idx
in
range
(
len
(
sampled
)):
if
self
.
depth_consistent
:
inds
=
np
.
where
(
paste_order
==
idx
)[
0
][
0
]
info
=
sampled
[
inds
]
else
:
info
=
sampled
[
idx
]
pcd_file_path
=
os
.
path
.
join
(
self
.
data_root
,
info
[
'path'
])
if
self
.
data_root
else
info
[
'path'
]
img_file_path
=
pcd_file_path
+
'.png'
mask_file_path
=
pcd_file_path
+
'.mask.png'
s_points
=
np
.
fromfile
(
pcd_file_path
,
dtype
=
np
.
float32
).
reshape
([
-
1
,
4
])
s_patch
=
mmcv
.
imread
(
img_file_path
)
s_mask
=
mmcv
.
imread
(
mask_file_path
,
'grayscale'
)
if
'rot_transform'
in
info
:
rot
=
info
[
'rot_transform'
]
s_points
[:,
:
3
]
=
box_np_ops
.
rotation_points_single_angle
(
s_points
[:,
:
3
],
rot
,
axis
=
2
)
# TODO: might need to rot 2d bbox in the future
# the points of each sample already minus the object center
# so this time it needs to add the offset back
s_points
[:,
:
3
]
+=
info
[
'box3d_lidar'
][:
3
]
img
=
self
.
paste_obj
(
img
,
s_patch
,
s_mask
,
bbox_2d
=
info
[
'box2d_camera'
].
astype
(
np
.
int32
))
count
+=
1
s_points_list
.
append
(
s_points
)
ret
=
dict
(
img
=
img
,
gt_names
=
np
.
array
([
s
[
'name'
]
for
s
in
sampled
]),
difficulty
=
np
.
array
([
s
[
'difficulty'
]
for
s
in
sampled
]),
gt_bboxes_3d
=
sampled_gt_bboxes_3d
,
gt_bboxes_2d
=
sampled_gt_bboxes_2d
,
points
=
np
.
concatenate
(
s_points_list
,
axis
=
0
),
group_ids
=
np
.
arange
(
gt_bboxes_3d
.
shape
[
0
],
gt_bboxes_3d
.
shape
[
0
]
+
len
(
sampled
)))
return
ret
def
paste_obj
(
self
,
img
,
obj_img
,
obj_mask
,
bbox_2d
):
# paste the image patch back
x1
,
y1
,
x2
,
y2
=
bbox_2d
# the bbox might exceed the img size because the img is different
img_h
,
img_w
=
img
.
shape
[:
2
]
w
=
np
.
maximum
(
min
(
x2
,
img_w
-
1
)
-
x1
+
1
,
1
)
h
=
np
.
maximum
(
min
(
y2
,
img_h
-
1
)
-
y1
+
1
,
1
)
obj_mask
=
obj_mask
[:
h
,
:
w
]
obj_img
=
obj_img
[:
h
,
:
w
]
# choose a blend option
if
not
self
.
blending_type
:
blending_op
=
'none'
else
:
blending_choice
=
np
.
random
.
randint
(
len
(
self
.
blending_type
))
blending_op
=
self
.
blending_type
[
blending_choice
]
if
blending_op
.
find
(
'poisson'
)
!=
-
1
:
# options: cv2.NORMAL_CLONE=1, or cv2.MONOCHROME_TRANSFER=3
# cv2.MIXED_CLONE mixed the texture, thus is not used.
if
blending_op
==
'poisson'
:
mode
=
np
.
random
.
choice
([
1
,
3
],
1
)[
0
]
elif
blending_op
==
'poisson_normal'
:
mode
=
cv2
.
NORMAL_CLONE
elif
blending_op
==
'poisson_transfer'
:
mode
=
cv2
.
MONOCHROME_TRANSFER
else
:
raise
NotImplementedError
center
=
(
int
(
x1
+
w
/
2
),
int
(
y1
+
h
/
2
))
img
=
cv2
.
seamlessClone
(
obj_img
,
img
,
obj_mask
*
255
,
center
,
mode
)
else
:
if
blending_op
==
'gaussian'
:
obj_mask
=
cv2
.
GaussianBlur
(
obj_mask
.
astype
(
np
.
float32
),
(
5
,
5
),
2
)
elif
blending_op
==
'box'
:
obj_mask
=
cv2
.
blur
(
obj_mask
.
astype
(
np
.
float32
),
(
3
,
3
))
paste_mask
=
1
-
obj_mask
img
[
y1
:
y1
+
h
,
x1
:
x1
+
w
]
=
(
img
[
y1
:
y1
+
h
,
x1
:
x1
+
w
].
astype
(
np
.
float32
)
*
paste_mask
[...,
None
]).
astype
(
np
.
uint8
)
img
[
y1
:
y1
+
h
,
x1
:
x1
+
w
]
+=
(
obj_img
.
astype
(
np
.
float32
)
*
obj_mask
[...,
None
]).
astype
(
np
.
uint8
)
return
img
def
sample_class_v2
(
self
,
name
,
num
,
gt_bboxes_3d
,
gt_bboxes_2d
):
sampled
=
self
.
sampler_dict
[
name
].
sample
(
num
)
sampled
=
copy
.
deepcopy
(
sampled
)
num_gt
=
gt_bboxes_3d
.
shape
[
0
]
num_sampled
=
len
(
sampled
)
# avoid collision in BEV first
gt_bboxes_bv
=
box_np_ops
.
center_to_corner_box2d
(
gt_bboxes_3d
[:,
0
:
2
],
gt_bboxes_3d
[:,
3
:
5
],
gt_bboxes_3d
[:,
6
])
sp_boxes
=
np
.
stack
([
i
[
'box3d_lidar'
]
for
i
in
sampled
],
axis
=
0
)
sp_boxes_bv
=
box_np_ops
.
center_to_corner_box2d
(
sp_boxes
[:,
0
:
2
],
sp_boxes
[:,
3
:
5
],
sp_boxes
[:,
6
])
total_bv
=
np
.
concatenate
([
gt_bboxes_bv
,
sp_boxes_bv
],
axis
=
0
)
coll_mat
=
data_augment_utils
.
box_collision_test
(
total_bv
,
total_bv
)
# Then avoid collision in 2D space
if
self
.
check_2D_collision
:
sp_boxes_2d
=
np
.
stack
([
i
[
'box2d_camera'
]
for
i
in
sampled
],
axis
=
0
)
total_bbox_2d
=
np
.
concatenate
([
gt_bboxes_2d
,
sp_boxes_2d
],
axis
=
0
)
# Nx4
# random select a collision threshold
if
isinstance
(
self
.
collision_thr
,
float
):
collision_thr
=
self
.
collision_thr
elif
isinstance
(
self
.
collision_thr
,
list
):
collision_thr
=
np
.
random
.
choice
(
self
.
collision_thr
)
elif
isinstance
(
self
.
collision_thr
,
dict
):
mode
=
self
.
collision_thr
.
get
(
'mode'
,
'value'
)
if
mode
==
'value'
:
collision_thr
=
np
.
random
.
choice
(
self
.
collision_thr
[
'thr_range'
])
elif
mode
==
'range'
:
collision_thr
=
np
.
random
.
uniform
(
self
.
collision_thr
[
'thr_range'
][
0
],
self
.
collision_thr
[
'thr_range'
][
1
])
if
collision_thr
==
0
:
# use similar collision test as BEV did
# Nx4 (x1, y1, x2, y2) -> corners: Nx4x2
# ((x1, y1), (x2, y1), (x1, y2), (x2, y2))
x1y1
=
total_bbox_2d
[:,
:
2
]
x2y2
=
total_bbox_2d
[:,
2
:]
x1y2
=
np
.
stack
([
total_bbox_2d
[:,
0
],
total_bbox_2d
[:,
3
]],
axis
=-
1
)
x2y1
=
np
.
stack
([
total_bbox_2d
[:,
2
],
total_bbox_2d
[:,
1
]],
axis
=-
1
)
total_2d
=
np
.
stack
([
x1y1
,
x2y1
,
x1y2
,
x2y2
],
axis
=
1
)
coll_mat_2d
=
data_augment_utils
.
box_collision_test
(
total_2d
,
total_2d
)
else
:
# use iof rather than iou to protect the foreground
overlaps
=
box_np_ops
.
iou_jit
(
total_bbox_2d
,
total_bbox_2d
,
'iof'
)
coll_mat_2d
=
overlaps
>
collision_thr
coll_mat
=
coll_mat
+
coll_mat_2d
diag
=
np
.
arange
(
total_bv
.
shape
[
0
])
coll_mat
[
diag
,
diag
]
=
False
valid_samples
=
[]
for
i
in
range
(
num_gt
,
num_gt
+
num_sampled
):
if
coll_mat
[
i
].
any
():
coll_mat
[
i
]
=
False
coll_mat
[:,
i
]
=
False
else
:
valid_samples
.
append
(
sampled
[
i
-
num_gt
])
return
valid_samples
mmdet3d/models/detectors/base.py
View file @
5a1575a0
from
abc
import
ABCMeta
,
abstractmethod
from
mmdet.models.detectors
import
BaseDetector
import
torch.nn
as
nn
class
Base3DDetector
(
BaseDetector
):
class
Base3DDetector
(
nn
.
Module
,
metaclass
=
ABCMeta
):
"""Base class for detectors"""
"""Base class for detectors"""
def
__init__
(
self
):
def
forward_test
(
self
,
points
,
img_metas
,
img
=
None
,
**
kwargs
):
super
(
Base3DDetector
,
self
).
__init__
()
self
.
fp16_enabled
=
False
@
property
def
with_neck
(
self
):
return
hasattr
(
self
,
'neck'
)
and
self
.
neck
is
not
None
@
property
def
with_shared_head
(
self
):
return
hasattr
(
self
,
'shared_head'
)
and
self
.
shared_head
is
not
None
@
property
def
with_bbox
(
self
):
return
hasattr
(
self
,
'bbox_head'
)
and
self
.
bbox_head
is
not
None
@
property
def
with_mask
(
self
):
return
hasattr
(
self
,
'mask_head'
)
and
self
.
mask_head
is
not
None
@
abstractmethod
def
extract_feat
(
self
,
imgs
):
pass
def
extract_feats
(
self
,
imgs
):
assert
isinstance
(
imgs
,
list
)
for
img
in
imgs
:
yield
self
.
extract_feat
(
img
)
@
abstractmethod
def
forward_train
(
self
,
**
kwargs
):
pass
@
abstractmethod
def
simple_test
(
self
,
**
kwargs
):
pass
@
abstractmethod
def
aug_test
(
self
,
**
kwargs
):
pass
def
init_weights
(
self
,
pretrained
=
None
):
if
pretrained
is
not
None
:
from
mmdet3d.utils
import
get_root_logger
logger
=
get_root_logger
()
logger
.
info
(
'load model from: {}'
.
format
(
pretrained
))
def
forward_test
(
self
,
points
,
img_metas
,
imgs
=
None
,
**
kwargs
):
"""
"""
Args:
Args:
points (List[Tensor]): the outer list indicates test-time
points (List[Tensor]): the outer list indicates test-time
...
@@ -62,7 +13,7 @@ class Base3DDetector(nn.Module, metaclass=ABCMeta):
...
@@ -62,7 +13,7 @@ class Base3DDetector(nn.Module, metaclass=ABCMeta):
img_metas (List[List[dict]]): the outer list indicates test-time
img_metas (List[List[dict]]): the outer list indicates test-time
augs (multiscale, flip, etc.) and the inner list indicates
augs (multiscale, flip, etc.) and the inner list indicates
images in a batch
images in a batch
img
s
(List[Tensor], optional): the outer list indicates test-time
img (List[Tensor], optional): the outer list indicates test-time
augmentations and inner Tensor should have a shape NxCxHxW,
augmentations and inner Tensor should have a shape NxCxHxW,
which contains all images in the batch. Defaults to None.
which contains all images in the batch. Defaults to None.
"""
"""
...
@@ -81,10 +32,10 @@ class Base3DDetector(nn.Module, metaclass=ABCMeta):
...
@@ -81,10 +32,10 @@ class Base3DDetector(nn.Module, metaclass=ABCMeta):
assert
samples_per_gpu
==
1
assert
samples_per_gpu
==
1
if
num_augs
==
1
:
if
num_augs
==
1
:
img
s
=
[
img
s
]
if
img
s
is
None
else
img
s
img
=
[
img
]
if
img
is
None
else
img
return
self
.
simple_test
(
points
[
0
],
img_metas
[
0
],
img
s
[
0
],
**
kwargs
)
return
self
.
simple_test
(
points
[
0
],
img_metas
[
0
],
img
[
0
],
**
kwargs
)
else
:
else
:
return
self
.
aug_test
(
points
,
img_metas
,
img
s
,
**
kwargs
)
return
self
.
aug_test
(
points
,
img_metas
,
img
,
**
kwargs
)
def
forward
(
self
,
return_loss
=
True
,
**
kwargs
):
def
forward
(
self
,
return_loss
=
True
,
**
kwargs
):
"""
"""
...
...
mmdet3d/models/detectors/mvx_two_stage.py
View file @
5a1575a0
...
@@ -265,13 +265,13 @@ class MVXTwoStageDetector(Base3DDetector):
...
@@ -265,13 +265,13 @@ class MVXTwoStageDetector(Base3DDetector):
proposal_list
=
proposals
proposal_list
=
proposals
# bbox head forward and loss
# bbox head forward and loss
img_roi_losses
=
self
.
roi_head
.
forward_train
(
x
,
img_metas
,
if
self
.
with_img_bbox
:
proposal_list
,
gt_bboxes
,
# bbox head forward and loss
gt_labels
,
img_roi_losses
=
self
.
img_roi_head
.
forward_train
(
gt_bboxes_ignore
,
x
,
img_metas
,
proposal_list
,
gt_bboxes
,
gt_labels
,
**
kwargs
)
gt_bboxes_ignore
,
**
kwargs
)
losses
.
update
(
img_roi_losses
)
losses
.
update
(
img_roi_losses
)
return
losses
return
losses
def
simple_test_img
(
self
,
x
,
img_metas
,
proposals
=
None
,
rescale
=
False
):
def
simple_test_img
(
self
,
x
,
img_metas
,
proposals
=
None
,
rescale
=
False
):
...
...
mmdet3d/models/detectors/parta2.py
View file @
5a1575a0
...
@@ -2,12 +2,17 @@ import torch
...
@@ -2,12 +2,17 @@ import torch
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
from
mmdet3d.ops
import
Voxelization
from
mmdet3d.ops
import
Voxelization
from
mmdet.models
import
DETECTORS
,
TwoStageDetector
from
mmdet.models
import
DETECTORS
from
..
import
builder
from
..
import
builder
from
.two_stage
import
TwoStage3DDetector
@
DETECTORS
.
register_module
()
@
DETECTORS
.
register_module
()
class
PartA2
(
TwoStageDetector
):
class
PartA2
(
TwoStage3DDetector
):
"""Part-A2 detector
Please refer to the `paper <https://arxiv.org/abs/1907.03670>`_
"""
def
__init__
(
self
,
def
__init__
(
self
,
voxel_layer
,
voxel_layer
,
...
@@ -111,41 +116,6 @@ class PartA2(TwoStageDetector):
...
@@ -111,41 +116,6 @@ class PartA2(TwoStageDetector):
return
losses
return
losses
def
forward_test
(
self
,
points
,
img_metas
,
imgs
=
None
,
**
kwargs
):
"""
Args:
points (List[Tensor]): the outer list indicates test-time
augmentations and inner Tensor should have a shape NxC,
which contains all points in the batch.
img_metas (List[List[dict]]): the outer list indicates test-time
augs (multiscale, flip, etc.) and the inner list indicates
images in a batch
"""
for
var
,
name
in
[(
points
,
'points'
),
(
img_metas
,
'img_metas'
)]:
if
not
isinstance
(
var
,
list
):
raise
TypeError
(
'{} must be a list, but got {}'
.
format
(
name
,
type
(
var
)))
num_augs
=
len
(
points
)
if
num_augs
!=
len
(
img_metas
):
raise
ValueError
(
'num of augmentations ({}) != num of image meta ({})'
.
format
(
len
(
points
),
len
(
img_metas
)))
# TODO: remove the restriction of imgs_per_gpu == 1 when prepared
samples_per_gpu
=
len
(
points
[
0
])
assert
samples_per_gpu
==
1
if
num_augs
==
1
:
return
self
.
simple_test
(
points
[
0
],
img_metas
[
0
],
**
kwargs
)
else
:
return
self
.
aug_test
(
points
,
img_metas
,
**
kwargs
)
def
forward
(
self
,
return_loss
=
True
,
**
kwargs
):
if
return_loss
:
return
self
.
forward_train
(
**
kwargs
)
else
:
return
self
.
forward_test
(
**
kwargs
)
def
simple_test
(
self
,
points
,
img_metas
,
proposals
=
None
,
rescale
=
False
):
def
simple_test
(
self
,
points
,
img_metas
,
proposals
=
None
,
rescale
=
False
):
feats_dict
,
voxels_dict
=
self
.
extract_feat
(
points
,
img_metas
)
feats_dict
,
voxels_dict
=
self
.
extract_feat
(
points
,
img_metas
)
...
@@ -159,6 +129,3 @@ class PartA2(TwoStageDetector):
...
@@ -159,6 +129,3 @@ class PartA2(TwoStageDetector):
return
self
.
roi_head
.
simple_test
(
feats_dict
,
voxels_dict
,
img_metas
,
return
self
.
roi_head
.
simple_test
(
feats_dict
,
voxels_dict
,
img_metas
,
proposal_list
)
proposal_list
)
def
aug_test
(
self
,
**
kwargs
):
raise
NotImplementedError
mmdet3d/models/detectors/single_stage.py
View file @
5a1575a0
...
@@ -6,6 +6,21 @@ from .base import Base3DDetector
...
@@ -6,6 +6,21 @@ from .base import Base3DDetector
@
DETECTORS
.
register_module
()
@
DETECTORS
.
register_module
()
class
SingleStage3DDetector
(
Base3DDetector
):
class
SingleStage3DDetector
(
Base3DDetector
):
"""SingleStage3DDetector
This class serves as a base class for single-stage 3D detectors.
Args:
backbone (dict): Config dict of detector's backbone.
neck (dict, optional): Config dict of neck. Defaults to None.
bbox_head (dict, optional): Config dict of box head. Defaults to None.
train_cfg (dict, optional): Config dict of training hyper-parameters.
Defaults to None.
test_cfg (dict, optional): Config dict of test hyper-parameters.
Defaults to None.
pretrained (str, optional): Path of pretrained models.
Defaults to None.
"""
def
__init__
(
self
,
def
__init__
(
self
,
backbone
,
backbone
,
...
...
mmdet3d/models/detectors/two_stage.py
0 → 100644
View file @
5a1575a0
from
mmdet.models
import
DETECTORS
,
TwoStageDetector
from
.base
import
Base3DDetector
@
DETECTORS
.
register_module
()
class
TwoStage3DDetector
(
Base3DDetector
,
TwoStageDetector
):
"""Base class of two-stage 3D detector
It inherits original ``:class:TwoStageDetector`` and
``:class:Base3DDetector``. This class could serve as a base class for
all two-stage 3D detectors.
"""
def
__init__
(
self
,
**
kwargs
):
super
(
TwoStage3DDetector
,
self
).
__init__
(
**
kwargs
)
mmdet3d/models/fusion_layers/point_fusion.py
View file @
5a1575a0
...
@@ -287,7 +287,7 @@ class PointFusion(nn.Module):
...
@@ -287,7 +287,7 @@ class PointFusion(nn.Module):
pts
.
new_tensor
(
img_meta
[
'pcd_rotation'
])
if
'pcd_rotation'
pts
.
new_tensor
(
img_meta
[
'pcd_rotation'
])
if
'pcd_rotation'
in
img_meta
.
keys
()
else
torch
.
eye
(
3
).
type_as
(
pts
).
to
(
pts
.
device
))
in
img_meta
.
keys
()
else
torch
.
eye
(
3
).
type_as
(
pts
).
to
(
pts
.
device
))
img_scale_factor
=
(
img_scale_factor
=
(
img_meta
[
'scale_factor'
]
pts
.
new_tensor
(
img_meta
[
'scale_factor'
]
[:
2
])
if
'scale_factor'
in
img_meta
.
keys
()
else
1
)
if
'scale_factor'
in
img_meta
.
keys
()
else
1
)
pcd_flip
=
img_meta
[
'pcd_flip'
]
if
'pcd_flip'
in
img_meta
.
keys
(
pcd_flip
=
img_meta
[
'pcd_flip'
]
if
'pcd_flip'
in
img_meta
.
keys
(
)
else
False
)
else
False
...
...
requirements/runtime.txt
View file @
5a1575a0
matplotlib
matplotlib
mmcv>=0.
5.1
mmcv>=0.
6.0
numba==0.48.0
numba==0.48.0
numpy
numpy
# need older pillow until torchvision is fixed
# need older pillow until torchvision is fixed
...
...
tests/test_config.py
View file @
5a1575a0
...
@@ -115,10 +115,8 @@ def test_config_data_pipeline():
...
@@ -115,10 +115,8 @@ def test_config_data_pipeline():
print
(
'Found config_dpath = {!r}'
.
format
(
config_dpath
))
print
(
'Found config_dpath = {!r}'
.
format
(
config_dpath
))
# Only tests a representative subset of configurations
# Only tests a representative subset of configurations
# TODO: test pipelines using Albu, current Albu throw None given empty GT
config_names
=
[
config_names
=
[
'mvxnet/faster_rcnn_r50_fpn_caffe_2x8_1x_nus.py'
,
'mvxnet/faster_rcnn_r50_fpn_caffe_2x8_1x_nus.py'
,
'mvxnet/retinanet_r50_fpn_caffe_2x8_1x_nus.py'
,
'mvxnet/'
'mvxnet/'
'faster_rcnn_r50_fpn_caffe_1x_kitti-2d-3class_coco-3x-pretrain.py'
,
'faster_rcnn_r50_fpn_caffe_1x_kitti-2d-3class_coco-3x-pretrain.py'
,
]
]
...
...
tests/test_forward.py
View file @
5a1575a0
...
@@ -82,10 +82,10 @@ def _test_two_stage_forward(cfg_file):
...
@@ -82,10 +82,10 @@ def _test_two_stage_forward(cfg_file):
gt_masks
=
gt_masks
,
gt_masks
=
gt_masks
,
return_loss
=
True
)
return_loss
=
True
)
assert
isinstance
(
losses
,
dict
)
assert
isinstance
(
losses
,
dict
)
from
mmdet.apis.train
import
parse_losses
loss
,
_
=
detector
.
_
parse_losses
(
losses
)
total_loss
=
parse_losses
(
losses
)[
0
]
.
requires_grad_
(
True
)
loss
.
requires_grad_
(
True
)
assert
float
(
total_
loss
.
item
())
>
0
assert
float
(
loss
.
item
())
>
0
total_
loss
.
backward
()
loss
.
backward
()
# Test forward train with an empty truth batch
# Test forward train with an empty truth batch
mm_inputs
=
_demo_mm_inputs
(
input_shape
,
num_items
=
[
0
])
mm_inputs
=
_demo_mm_inputs
(
input_shape
,
num_items
=
[
0
])
...
@@ -102,10 +102,9 @@ def _test_two_stage_forward(cfg_file):
...
@@ -102,10 +102,9 @@ def _test_two_stage_forward(cfg_file):
gt_masks
=
gt_masks
,
gt_masks
=
gt_masks
,
return_loss
=
True
)
return_loss
=
True
)
assert
isinstance
(
losses
,
dict
)
assert
isinstance
(
losses
,
dict
)
from
mmdet.apis.train
import
parse_losses
loss
,
_
=
detector
.
_parse_losses
(
losses
)
total_loss
=
parse_losses
(
losses
)[
0
].
requires_grad_
(
True
)
assert
float
(
loss
.
item
())
>
0
assert
float
(
total_loss
.
item
())
>
0
loss
.
backward
()
total_loss
.
backward
()
# Test forward test
# Test forward test
with
torch
.
no_grad
():
with
torch
.
no_grad
():
...
@@ -140,6 +139,8 @@ def _test_single_stage_forward(cfg_file):
...
@@ -140,6 +139,8 @@ def _test_single_stage_forward(cfg_file):
gt_labels
=
gt_labels
,
gt_labels
=
gt_labels
,
return_loss
=
True
)
return_loss
=
True
)
assert
isinstance
(
losses
,
dict
)
assert
isinstance
(
losses
,
dict
)
loss
,
_
=
detector
.
_parse_losses
(
losses
)
assert
float
(
loss
.
item
())
>
0
# Test forward test
# Test forward test
with
torch
.
no_grad
():
with
torch
.
no_grad
():
...
...
tools/train.py
View file @
5a1575a0
...
@@ -12,11 +12,10 @@ from mmcv import Config, DictAction
...
@@ -12,11 +12,10 @@ from mmcv import Config, DictAction
from
mmcv.runner
import
init_dist
from
mmcv.runner
import
init_dist
from
mmdet3d
import
__version__
from
mmdet3d
import
__version__
from
mmdet3d.apis
import
train_detector
from
mmdet3d.datasets
import
build_dataset
from
mmdet3d.datasets
import
build_dataset
from
mmdet3d.models
import
build_detector
from
mmdet3d.models
import
build_detector
from
mmdet3d.utils
import
collect_env
,
get_root_logger
from
mmdet3d.utils
import
collect_env
,
get_root_logger
from
mmdet.apis
import
set_random_seed
from
mmdet.apis
import
set_random_seed
,
train_detector
def
parse_args
():
def
parse_args
():
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment