Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
3298db8a
Commit
3298db8a
authored
Jun 19, 2020
by
zhangwenwei
Browse files
Add ori mvx
parent
0ed9c576
Changes
20
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
139 additions
and
896 deletions
+139
-896
.gitlab-ci.yml
.gitlab-ci.yml
+1
-1
configs/mvxnet/dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class.py
...dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class.py
+22
-32
configs/mvxnet/faster_rcnn_regnet-3gf_fpn_2x8_1x_nus.py
configs/mvxnet/faster_rcnn_regnet-3gf_fpn_2x8_1x_nus.py
+0
-206
configs/mvxnet/retinanet_r50_fpn_caffe_2x8_1x_nus.py
configs/mvxnet/retinanet_r50_fpn_caffe_2x8_1x_nus.py
+0
-141
docs/api.rst
docs/api.rst
+0
-5
mmdet3d/apis/__init__.py
mmdet3d/apis/__init__.py
+0
-3
mmdet3d/apis/train.py
mmdet3d/apis/train.py
+0
-122
mmdet3d/datasets/kitti_dataset.py
mmdet3d/datasets/kitti_dataset.py
+51
-13
mmdet3d/datasets/pipelines/__init__.py
mmdet3d/datasets/pipelines/__init__.py
+3
-3
mmdet3d/datasets/pipelines/dbsampler.py
mmdet3d/datasets/pipelines/dbsampler.py
+0
-254
mmdet3d/models/detectors/base.py
mmdet3d/models/detectors/base.py
+7
-56
mmdet3d/models/detectors/mvx_two_stage.py
mmdet3d/models/detectors/mvx_two_stage.py
+6
-6
mmdet3d/models/detectors/parta2.py
mmdet3d/models/detectors/parta2.py
+7
-40
mmdet3d/models/detectors/single_stage.py
mmdet3d/models/detectors/single_stage.py
+15
-0
mmdet3d/models/detectors/two_stage.py
mmdet3d/models/detectors/two_stage.py
+15
-0
mmdet3d/models/fusion_layers/point_fusion.py
mmdet3d/models/fusion_layers/point_fusion.py
+1
-1
requirements/runtime.txt
requirements/runtime.txt
+1
-1
tests/test_config.py
tests/test_config.py
+0
-2
tests/test_forward.py
tests/test_forward.py
+9
-8
tools/train.py
tools/train.py
+1
-2
No files found.
.gitlab-ci.yml
View file @
3298db8a
...
@@ -27,7 +27,7 @@ linting:
...
@@ -27,7 +27,7 @@ linting:
stage
:
test
stage
:
test
script
:
script
:
-
echo "Start building..."
-
echo "Start building..."
-
pip install "git+https://github.com/
cocodataset
/cocoapi.git#subdirectory=
PythonAPI
"
-
pip install "git+https://github.com/
open-mmlab
/cocoapi.git#subdirectory=
pycocotools
"
-
pip install git+https://github.com/open-mmlab/mmcv.git
-
pip install git+https://github.com/open-mmlab/mmcv.git
-
pip install git+https://github.com/open-mmlab/mmdetection.git
-
pip install git+https://github.com/open-mmlab/mmdetection.git
-
python -c "import mmdet; print(mmdet.__version__)"
-
python -c "import mmdet; print(mmdet.__version__)"
...
...
configs/mvxnet/dv_mvx-
v2
_second_secfpn_
fpn-fusion_
adamw_2x8_80e_kitti-3d-3class.py
→
configs/mvxnet/dv_mvx-
fpn
_second_secfpn_adamw_2x8_80e_kitti-3d-3class.py
View file @
3298db8a
...
@@ -4,8 +4,6 @@ point_cloud_range = [0, -40, -3, 70.4, 40, 1]
...
@@ -4,8 +4,6 @@ point_cloud_range = [0, -40, -3, 70.4, 40, 1]
model
=
dict
(
model
=
dict
(
type
=
'DynamicMVXFasterRCNN'
,
type
=
'DynamicMVXFasterRCNN'
,
pretrained
=
(
'./pretrain_detectron/'
'ImageNetPretrained/MSRA/resnet50_msra.pth'
),
img_backbone
=
dict
(
img_backbone
=
dict
(
type
=
'ResNet'
,
type
=
'ResNet'
,
depth
=
50
,
depth
=
50
,
...
@@ -136,22 +134,10 @@ class_names = ['Pedestrian', 'Cyclist', 'Car']
...
@@ -136,22 +134,10 @@ class_names = ['Pedestrian', 'Cyclist', 'Car']
img_norm_cfg
=
dict
(
img_norm_cfg
=
dict
(
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
to_rgb
=
False
)
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
to_rgb
=
False
)
input_modality
=
dict
(
use_lidar
=
True
,
use_camera
=
True
)
input_modality
=
dict
(
use_lidar
=
True
,
use_camera
=
True
)
db_sampler
=
dict
(
type
=
'MMDataBaseSampler'
,
data_root
=
data_root
,
info_path
=
data_root
+
'kitti_mm_dbinfos_train.pkl'
,
rate
=
1.0
,
object_rot_range
=
[
0.0
,
0.0
],
blending_type
=
[
'box'
,
'gaussian'
,
'poisson'
],
depth_consistent
=
True
,
check_2D_collision
=
True
,
collision_thr
=
[
0
,
0.3
,
0.5
,
0.7
],
prepare
=
dict
(
filter_by_difficulty
=
[
-
1
],
filter_by_min_points
=
dict
(
Car
=
5
,
Pedestrian
=
10
,
Cyclist
=
10
)),
sample_groups
=
dict
(
Car
=
12
,
Pedestrian
=
6
,
Cyclist
=
6
),
classes
=
class_names
)
train_pipeline
=
[
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
),
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
),
dict
(
dict
(
type
=
'Resize'
,
type
=
'Resize'
,
img_scale
=
[(
640
,
192
),
(
2560
,
768
)],
img_scale
=
[(
640
,
192
),
(
2560
,
768
)],
...
@@ -171,10 +157,11 @@ train_pipeline = [
...
@@ -171,10 +157,11 @@ train_pipeline = [
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
dict
(
type
=
'Collect3D'
,
type
=
'Collect3D'
,
keys
=
[
'points'
,
'img'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
keys
=
[
'points'
,
'img'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
,
]
]
test_pipeline
=
[
test_pipeline
=
[
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
),
dict
(
type
=
'LoadImageFromFile'
),
dict
(
dict
(
type
=
'MultiScaleFlipAug3D'
,
type
=
'MultiScaleFlipAug3D'
,
img_scale
=
(
1280
,
384
),
img_scale
=
(
1280
,
384
),
...
@@ -196,7 +183,7 @@ test_pipeline = [
...
@@ -196,7 +183,7 @@ test_pipeline = [
type
=
'DefaultFormatBundle3D'
,
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
class_names
=
class_names
,
with_label
=
False
),
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'img'
])
])
])
]
]
...
@@ -204,15 +191,18 @@ data = dict(
...
@@ -204,15 +191,18 @@ data = dict(
samples_per_gpu
=
2
,
samples_per_gpu
=
2
,
workers_per_gpu
=
2
,
workers_per_gpu
=
2
,
train
=
dict
(
train
=
dict
(
type
=
dataset_type
,
type
=
'RepeatDataset'
,
data_root
=
data_root
,
times
=
2
,
ann_file
=
data_root
+
'kitti_infos_train.pkl'
,
dataset
=
dict
(
split
=
'training'
,
type
=
dataset_type
,
pts_prefix
=
'velodyne_reduced'
,
data_root
=
data_root
,
pipeline
=
train_pipeline
,
ann_file
=
data_root
+
'kitti_infos_train.pkl'
,
modality
=
input_modality
,
split
=
'training'
,
classes
=
class_names
,
pts_prefix
=
'velodyne_reduced'
,
test_mode
=
False
),
pipeline
=
train_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
False
)),
val
=
dict
(
val
=
dict
(
type
=
dataset_type
,
type
=
dataset_type
,
data_root
=
data_root
,
data_root
=
data_root
,
...
@@ -255,10 +245,10 @@ log_config = dict(
...
@@ -255,10 +245,10 @@ log_config = dict(
# yapf:enable
# yapf:enable
evaluation
=
dict
(
interval
=
1
)
evaluation
=
dict
(
interval
=
1
)
# runtime settings
# runtime settings
total_epochs
=
8
0
total_epochs
=
4
0
dist_params
=
dict
(
backend
=
'nccl'
)
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
log_level
=
'INFO'
work_dir
=
'./work_dirs/sec_secfpn_80e'
work_dir
=
None
load_from
=
'./pretrain_mmdet/mvx_faster_rcnn_
r50_fpn_
detectron2-caffe_
freezeBN_l1-loss_roialign-v2_1x_coco-3-class_44.7_20200205-b1c1533f
.pth'
# noqa
load_from
=
'./pretrain_mmdet/mvx_faster_rcnn_detectron2-caffe_
20e_coco-pretrain_gt-sample_kitti-3-class_moderate-79.3_20200207-a4a6a3c7
.pth'
# noqa
resume_from
=
None
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
workflow
=
[(
'train'
,
1
)]
configs/mvxnet/faster_rcnn_regnet-3gf_fpn_2x8_1x_nus.py
deleted
100644 → 0
View file @
0ed9c576
# model settings
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
False
)
model
=
dict
(
type
=
'FasterRCNN'
,
pretrained
=
'open-mmlab://regnetx_3.2gf'
,
backbone
=
dict
(
type
=
'RegNet'
,
arch
=
'regnetx_3.2gf'
,
out_indices
=
(
0
,
1
,
2
,
3
),
frozen_stages
=
1
,
base_channels
=
32
,
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
True
),
norm_eval
=
True
,
style
=
'pytorch'
),
neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
96
,
192
,
432
,
1008
],
out_channels
=
256
,
num_outs
=
5
),
rpn_head
=
dict
(
type
=
'RPNHead'
,
in_channels
=
256
,
feat_channels
=
256
,
anchor_generator
=
dict
(
type
=
'AnchorGenerator'
,
scales
=
[
8
],
ratios
=
[
0.5
,
1.0
,
2.0
],
strides
=
[
4
,
8
,
16
,
32
,
64
]),
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[.
0
,
.
0
,
.
0
,
.
0
],
target_stds
=
[
1.0
,
1.0
,
1.0
,
1.0
]),
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
1.0
)),
roi_head
=
dict
(
type
=
'StandardRoIHead'
,
bbox_roi_extractor
=
dict
(
type
=
'SingleRoIExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAlign'
,
out_size
=
7
,
sample_num
=
0
),
out_channels
=
256
,
featmap_strides
=
[
4
,
8
,
16
,
32
]),
bbox_head
=
dict
(
type
=
'Shared2FCBBoxHead'
,
in_channels
=
256
,
fc_out_channels
=
1024
,
roi_feat_size
=
7
,
num_classes
=
10
,
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[
0.
,
0.
,
0.
,
0.
],
target_stds
=
[
0.1
,
0.1
,
0.2
,
0.2
]),
reg_class_agnostic
=
False
,
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
1.0
))))
# model training and testing settings
train_cfg
=
dict
(
rpn
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.7
,
neg_iou_thr
=
0.3
,
min_pos_iou
=
0.3
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
256
,
pos_fraction
=
0.5
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
False
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
),
rpn_proposal
=
dict
(
nms_across_levels
=
False
,
nms_pre
=
2000
,
# following the setting of detectron,
# which improves ~0.2 bbox mAP.
nms_post
=
1000
,
max_num
=
1000
,
nms_thr
=
0.7
,
min_bbox_size
=
0
),
rcnn
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.5
,
min_pos_iou
=
0.5
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
512
,
pos_fraction
=
0.25
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
True
),
pos_weight
=-
1
,
debug
=
False
))
test_cfg
=
dict
(
rpn
=
dict
(
nms_across_levels
=
False
,
nms_pre
=
1000
,
nms_post
=
1000
,
max_num
=
1000
,
nms_thr
=
0.7
,
min_bbox_size
=
0
),
rcnn
=
dict
(
score_thr
=
0.05
,
nms
=
dict
(
type
=
'nms'
,
iou_thr
=
0.5
),
max_per_img
=
100
)
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type
=
'CocoDataset'
data_root
=
'data/nuscenes/'
classes
=
(
'car'
,
'truck'
,
'trailer'
,
'bus'
,
'construction_vehicle'
,
'bicycle'
,
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
)
img_norm_cfg
=
dict
(
# The mean and std is used in PyCls when training RegNets
mean
=
[
103.53
,
116.28
,
123.675
],
std
=
[
57.375
,
57.12
,
58.395
],
to_rgb
=
False
)
file_client_args
=
dict
(
backend
=
'petrel'
,
path_mapping
=
dict
({
'./data/nuscenes/'
:
's3://nuscenes/nuscenes/'
,
'data/nuscenes/'
:
's3://nuscenes/nuscenes/'
}))
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
,
with_mask
=
False
,
file_client_args
=
file_client_args
),
dict
(
type
=
'Resize'
,
img_scale
=
(
1280
,
720
),
ratio_range
=
(
0.75
,
1.25
),
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
,
file_client_args
=
file_client_args
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
1280
,
720
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
]),
])
]
data
=
dict
(
samples_per_gpu
=
2
,
workers_per_gpu
=
2
,
train
=
dict
(
type
=
dataset_type
,
classes
=
classes
,
ann_file
=
data_root
+
'nuscenes_infos_train.coco.json'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
classes
=
classes
,
ann_file
=
data_root
+
'nuscenes_infos_val.coco.json'
,
pipeline
=
test_pipeline
),
test
=
dict
(
type
=
dataset_type
,
classes
=
classes
,
ann_file
=
data_root
+
'nuscenes_infos_val.coco.json'
,
pipeline
=
test_pipeline
))
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.02
,
momentum
=
0.9
,
weight_decay
=
0.00005
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
1000
,
warmup_ratio
=
1.0
/
1000
,
step
=
[
8
,
11
])
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
# yapf:enable
evaluation
=
dict
(
interval
=
1
)
# runtime settings
total_epochs
=
12
dist_params
=
dict
(
backend
=
'nccl'
,
port
=
29501
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/faster_rcnn_r50_fpn_1x'
load_from
=
'./pretrain_mmdet/mask_rcnn_regnetx-3GF_fpn_mstrain_3x_coco_box-AP-43.1_mask-AP-38.7-e003695a.pth'
# noqa
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
configs/mvxnet/retinanet_r50_fpn_caffe_2x8_1x_nus.py
deleted
100644 → 0
View file @
0ed9c576
# model settings
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
False
)
model
=
dict
(
type
=
'RetinaNet'
,
pretrained
=
(
'open-mmlab://resnet50_caffe_bgr'
),
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
frozen_stages
=
1
,
norm_cfg
=
norm_cfg
,
norm_eval
=
True
,
style
=
'caffe'
),
neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
256
,
512
,
1024
,
2048
],
out_channels
=
256
,
start_level
=
1
,
add_extra_convs
=
True
,
num_outs
=
5
),
bbox_head
=
dict
(
type
=
'RetinaHead'
,
num_classes
=
10
,
in_channels
=
256
,
stacked_convs
=
4
,
feat_channels
=
256
,
anchor_generator
=
dict
(
type
=
'AnchorGenerator'
,
octave_base_scale
=
4
,
scales_per_octave
=
3
,
ratios
=
[
0.5
,
1.0
,
2.0
],
strides
=
[
8
,
16
,
32
,
64
,
128
]),
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[.
0
,
.
0
,
.
0
,
.
0
],
target_stds
=
[
1.0
,
1.0
,
1.0
,
1.0
]),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
1.0
)))
# training and testing settings
train_cfg
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.4
,
min_pos_iou
=
0
,
ignore_iof_thr
=-
1
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
)
test_cfg
=
dict
(
nms_pre
=
1000
,
min_bbox_size
=
0
,
score_thr
=
0.05
,
nms
=
dict
(
type
=
'nms'
,
iou_thr
=
0.5
),
max_per_img
=
100
)
# dataset settings
dataset_type
=
'NuScenes2DDataset'
data_root
=
'data/nuscenes/'
# Values to be used for image normalization (BGR order)
# Default mean pixel value are from ImageNet: [103.53, 116.28, 123.675]
# When using pre-trained models in Detectron1 or any MSRA models,
# std has been absorbed into its conv1 weights, so the std needs to be set 1.
img_norm_cfg
=
dict
(
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
to_rgb
=
False
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
),
dict
(
type
=
'Resize'
,
img_scale
=
(
1600
,
900
),
ratio_range
=
(
0.8
,
1.2
),
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
1600
,
900
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
]),
])
]
data
=
dict
(
samples_per_gpu
=
2
,
workers_per_gpu
=
2
,
train
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'nuscenes_infos_train.coco.json'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'nuscenes_infos_val.coco.json'
,
pipeline
=
test_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'nuscenes_infos_val.coco.json'
,
pipeline
=
test_pipeline
))
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.01
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
1000
,
warmup_ratio
=
1.0
/
1000
,
step
=
[
8
,
11
])
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
# yapf:enable
# runtime settings
total_epochs
=
12
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/retinanet_r50_fpn_1x'
load_from
=
None
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
docs/api.rst
View file @
3298db8a
API Documentation
API Documentation
=================
=================
mmdet3d.apis
--------------
.. automodule:: mmdet3d.apis
:members:
mmdet3d.core
mmdet3d.core
--------------
--------------
...
...
mmdet3d/apis/__init__.py
deleted
100644 → 0
View file @
0ed9c576
from
.train
import
batch_processor
,
train_detector
__all__
=
[
'batch_processor'
,
'train_detector'
]
mmdet3d/apis/train.py
deleted
100644 → 0
View file @
0ed9c576
import
torch
from
mmcv.parallel
import
MMDataParallel
,
MMDistributedDataParallel
from
mmcv.runner
import
DistSamplerSeedHook
,
Runner
,
build_optimizer
from
mmdet3d.utils
import
get_root_logger
from
mmdet.apis.train
import
parse_losses
from
mmdet.core
import
(
DistEvalHook
,
DistOptimizerHook
,
EvalHook
,
Fp16OptimizerHook
)
from
mmdet.datasets
import
build_dataloader
,
build_dataset
def
batch_processor
(
model
,
data
,
train_mode
):
"""Process a data batch.
This method is required as an argument of Runner, which defines how to
process a data batch and obtain proper outputs. The first 3 arguments of
batch_processor are fixed.
Args:
model (nn.Module): A PyTorch model.
data (dict): The data batch in a dict.
train_mode (bool): Training mode or not. It may be useless for some
models.
Returns:
dict: A dict containing losses and log vars.
"""
losses
=
model
(
**
data
)
loss
,
log_vars
=
parse_losses
(
losses
)
if
'img_metas'
in
data
:
num_samples
=
len
(
data
[
'img_metas'
].
data
)
else
:
num_samples
=
len
(
data
[
'img'
].
data
)
outputs
=
dict
(
loss
=
loss
,
log_vars
=
log_vars
,
num_samples
=
num_samples
)
return
outputs
def
train_detector
(
model
,
dataset
,
cfg
,
distributed
=
False
,
validate
=
False
,
timestamp
=
None
,
meta
=
None
):
logger
=
get_root_logger
(
cfg
.
log_level
)
# prepare data loaders
dataset
=
dataset
if
isinstance
(
dataset
,
(
list
,
tuple
))
else
[
dataset
]
data_loaders
=
[
build_dataloader
(
ds
,
cfg
.
data
.
samples_per_gpu
,
cfg
.
data
.
workers_per_gpu
,
# cfg.gpus will be ignored if distributed
len
(
cfg
.
gpu_ids
),
dist
=
distributed
,
seed
=
cfg
.
seed
)
for
ds
in
dataset
]
# put model on gpus
if
distributed
:
find_unused_parameters
=
cfg
.
get
(
'find_unused_parameters'
,
False
)
# Sets the `find_unused_parameters` parameter in
# torch.nn.parallel.DistributedDataParallel
model
=
MMDistributedDataParallel
(
model
.
cuda
(),
device_ids
=
[
torch
.
cuda
.
current_device
()],
broadcast_buffers
=
False
,
find_unused_parameters
=
find_unused_parameters
)
else
:
model
=
MMDataParallel
(
model
.
cuda
(
cfg
.
gpu_ids
[
0
]),
device_ids
=
cfg
.
gpu_ids
)
# build runner
optimizer
=
build_optimizer
(
model
,
cfg
.
optimizer
)
runner
=
Runner
(
model
,
batch_processor
,
optimizer
,
cfg
.
work_dir
,
logger
=
logger
,
meta
=
meta
)
# an ugly walkaround to make the .log and .log.json filenames the same
runner
.
timestamp
=
timestamp
# fp16 setting
fp16_cfg
=
cfg
.
get
(
'fp16'
,
None
)
if
fp16_cfg
is
not
None
:
optimizer_config
=
Fp16OptimizerHook
(
**
cfg
.
optimizer_config
,
**
fp16_cfg
,
distributed
=
distributed
)
elif
distributed
and
'type'
not
in
cfg
.
optimizer_config
:
optimizer_config
=
DistOptimizerHook
(
**
cfg
.
optimizer_config
)
else
:
optimizer_config
=
cfg
.
optimizer_config
# register hooks
runner
.
register_training_hooks
(
cfg
.
lr_config
,
optimizer_config
,
cfg
.
checkpoint_config
,
cfg
.
log_config
,
cfg
.
get
(
'momentum_config'
,
None
))
if
distributed
:
runner
.
register_hook
(
DistSamplerSeedHook
())
# register eval hooks
if
validate
:
val_dataset
=
build_dataset
(
cfg
.
data
.
val
,
dict
(
test_mode
=
True
))
val_dataloader
=
build_dataloader
(
val_dataset
,
samples_per_gpu
=
1
,
workers_per_gpu
=
cfg
.
data
.
workers_per_gpu
,
dist
=
distributed
,
shuffle
=
False
)
eval_cfg
=
cfg
.
get
(
'evaluation'
,
{})
eval_hook
=
DistEvalHook
if
distributed
else
EvalHook
runner
.
register_hook
(
eval_hook
(
val_dataloader
,
**
eval_cfg
))
if
cfg
.
resume_from
:
runner
.
resume
(
cfg
.
resume_from
)
elif
cfg
.
load_from
:
runner
.
load_checkpoint
(
cfg
.
load_from
)
runner
.
run
(
data_loaders
,
cfg
.
workflow
,
cfg
.
total_epochs
)
mmdet3d/datasets/kitti_dataset.py
View file @
3298db8a
...
@@ -52,7 +52,7 @@ class KittiDataset(Custom3DDataset):
...
@@ -52,7 +52,7 @@ class KittiDataset(Custom3DDataset):
def
get_data_info
(
self
,
index
):
def
get_data_info
(
self
,
index
):
info
=
self
.
data_infos
[
index
]
info
=
self
.
data_infos
[
index
]
sample_idx
=
info
[
'image'
][
'image_idx'
]
sample_idx
=
info
[
'image'
][
'image_idx'
]
img_filename
=
os
.
path
.
join
(
self
.
root_spli
t
,
img_filename
=
os
.
path
.
join
(
self
.
data_roo
t
,
info
[
'image'
][
'image_path'
])
info
[
'image'
][
'image_path'
])
# TODO: consider use torch.Tensor only
# TODO: consider use torch.Tensor only
...
@@ -65,7 +65,8 @@ class KittiDataset(Custom3DDataset):
...
@@ -65,7 +65,8 @@ class KittiDataset(Custom3DDataset):
input_dict
=
dict
(
input_dict
=
dict
(
sample_idx
=
sample_idx
,
sample_idx
=
sample_idx
,
pts_filename
=
pts_filename
,
pts_filename
=
pts_filename
,
img_filename
=
img_filename
,
img_prefix
=
None
,
img_info
=
dict
(
filename
=
img_filename
),
lidar2img
=
lidar2img
)
lidar2img
=
lidar2img
)
if
not
self
.
test_mode
:
if
not
self
.
test_mode
:
...
@@ -113,8 +114,8 @@ class KittiDataset(Custom3DDataset):
...
@@ -113,8 +114,8 @@ class KittiDataset(Custom3DDataset):
anns_results
=
dict
(
anns_results
=
dict
(
gt_bboxes_3d
=
gt_bboxes_3d
,
gt_bboxes_3d
=
gt_bboxes_3d
,
gt_labels_3d
=
gt_labels_3d
,
gt_labels_3d
=
gt_labels_3d
,
gt_
bboxes
=
gt_bboxes
,
bboxes
=
gt_bboxes
,
gt_
labels
=
gt_labels
)
labels
=
gt_labels
)
return
anns_results
return
anns_results
def
drop_arrays_by_name
(
self
,
gt_names
,
used_classes
):
def
drop_arrays_by_name
(
self
,
gt_names
,
used_classes
):
...
@@ -151,6 +152,24 @@ class KittiDataset(Custom3DDataset):
...
@@ -151,6 +152,24 @@ class KittiDataset(Custom3DDataset):
result_files
=
self
.
bbox2result_kitti2d
(
outputs
,
self
.
CLASSES
,
result_files
=
self
.
bbox2result_kitti2d
(
outputs
,
self
.
CLASSES
,
pklfile_prefix
,
pklfile_prefix
,
submission_prefix
)
submission_prefix
)
elif
'pts_bbox'
in
outputs
[
0
]
or
'img_bbox'
in
outputs
[
0
]:
result_files
=
dict
()
for
name
in
outputs
[
0
]:
results_
=
[
out
[
name
]
for
out
in
outputs
]
pklfile_prefix_
=
pklfile_prefix
+
name
if
submission_prefix
is
not
None
:
submission_prefix_
=
submission_prefix
+
name
else
:
submission_prefix_
=
None
if
'img'
in
name
:
result_files
=
self
.
bbox2result_kitti2d
(
results_
,
self
.
CLASSES
,
pklfile_prefix_
,
submission_prefix_
)
else
:
result_files_
=
self
.
bbox2result_kitti
(
results_
,
self
.
CLASSES
,
pklfile_prefix_
,
submission_prefix_
)
result_files
[
name
]
=
result_files_
else
:
else
:
result_files
=
self
.
bbox2result_kitti
(
outputs
,
self
.
CLASSES
,
result_files
=
self
.
bbox2result_kitti
(
outputs
,
self
.
CLASSES
,
pklfile_prefix
,
pklfile_prefix
,
...
@@ -162,8 +181,7 @@ class KittiDataset(Custom3DDataset):
...
@@ -162,8 +181,7 @@ class KittiDataset(Custom3DDataset):
metric
=
None
,
metric
=
None
,
logger
=
None
,
logger
=
None
,
pklfile_prefix
=
None
,
pklfile_prefix
=
None
,
submission_prefix
=
None
,
submission_prefix
=
None
):
result_names
=
[
'pts_bbox'
]):
"""Evaluation in KITTI protocol.
"""Evaluation in KITTI protocol.
Args:
Args:
...
@@ -178,18 +196,38 @@ class KittiDataset(Custom3DDataset):
...
@@ -178,18 +196,38 @@ class KittiDataset(Custom3DDataset):
If not specified, the submission data will not be generated.
If not specified, the submission data will not be generated.
Returns:
Returns:
dict[str: float]
dict[str: float]
: results of each evaluation metric
"""
"""
result_files
,
tmp_dir
=
self
.
format_results
(
results
,
pklfile_prefix
)
result_files
,
tmp_dir
=
self
.
format_results
(
results
,
pklfile_prefix
)
from
mmdet3d.core.evaluation
import
kitti_eval
from
mmdet3d.core.evaluation
import
kitti_eval
gt_annos
=
[
info
[
'annos'
]
for
info
in
self
.
data_infos
]
gt_annos
=
[
info
[
'annos'
]
for
info
in
self
.
data_infos
]
if
metric
==
'img_bbox'
:
ap_result_str
,
ap_dict
=
kitti_eval
(
if
isinstance
(
result_files
,
dict
):
gt_annos
,
result_files
,
self
.
CLASSES
,
eval_types
=
[
'bbox'
])
ap_dict
=
dict
()
for
name
,
result_files_
in
result_files
.
items
():
eval_types
=
[
'bbox'
,
'bev'
,
'3d'
]
if
'img'
in
name
:
eval_types
=
[
'bbox'
]
ap_result_str
,
ap_dict_
=
kitti_eval
(
gt_annos
,
result_files_
,
self
.
CLASSES
,
eval_types
=
eval_types
)
for
ap_type
,
ap
in
ap_dict_
.
items
():
ap_dict
[
f
'
{
name
}
/
{
ap_type
}
'
]
=
float
(
'{:.4f}'
.
format
(
ap
))
print_log
(
f
'Results of
{
name
}
:
\n
'
+
ap_result_str
,
logger
=
logger
)
else
:
else
:
ap_result_str
,
ap_dict
=
kitti_eval
(
gt_annos
,
result_files
,
if
metric
==
'img_bbox'
:
self
.
CLASSES
)
ap_result_str
,
ap_dict
=
kitti_eval
(
print_log
(
'
\n
'
+
ap_result_str
,
logger
=
logger
)
gt_annos
,
result_files
,
self
.
CLASSES
,
eval_types
=
[
'bbox'
])
else
:
ap_result_str
,
ap_dict
=
kitti_eval
(
gt_annos
,
result_files
,
self
.
CLASSES
)
print_log
(
'
\n
'
+
ap_result_str
,
logger
=
logger
)
if
tmp_dir
is
not
None
:
if
tmp_dir
is
not
None
:
tmp_dir
.
cleanup
()
tmp_dir
.
cleanup
()
return
ap_dict
return
ap_dict
...
...
mmdet3d/datasets/pipelines/__init__.py
View file @
3298db8a
from
mmdet.datasets.pipelines
import
Compose
from
mmdet.datasets.pipelines
import
Compose
from
.dbsampler
import
DataBaseSampler
,
MMDataBaseSampler
from
.dbsampler
import
DataBaseSampler
from
.formating
import
DefaultFormatBundle
,
DefaultFormatBundle3D
from
.formating
import
DefaultFormatBundle
,
DefaultFormatBundle3D
from
.indoor_augment
import
(
IndoorFlipData
,
IndoorGlobalRotScaleTrans
,
from
.indoor_augment
import
(
IndoorFlipData
,
IndoorGlobalRotScaleTrans
,
IndoorPointsColorJitter
)
IndoorPointsColorJitter
)
...
@@ -19,6 +19,6 @@ __all__ = [
...
@@ -19,6 +19,6 @@ __all__ = [
'Compose'
,
'LoadMultiViewImageFromFiles'
,
'LoadPointsFromFile'
,
'Compose'
,
'LoadMultiViewImageFromFiles'
,
'LoadPointsFromFile'
,
'DefaultFormatBundle'
,
'DefaultFormatBundle3D'
,
'DataBaseSampler'
,
'DefaultFormatBundle'
,
'DefaultFormatBundle3D'
,
'DataBaseSampler'
,
'IndoorGlobalRotScaleTrans'
,
'IndoorPointsColorJitter'
,
'IndoorFlipData'
,
'IndoorGlobalRotScaleTrans'
,
'IndoorPointsColorJitter'
,
'IndoorFlipData'
,
'MMDataBaseSampler'
,
'NormalizePointsColor'
,
'LoadAnnotations3D'
,
'NormalizePointsColor'
,
'LoadAnnotations3D'
,
'IndoorPointSample'
,
'IndoorPointSample'
,
'PointSegClassMapping'
,
'MultiScaleFlipAug3D'
'PointSegClassMapping'
,
'MultiScaleFlipAug3D'
]
]
mmdet3d/datasets/pipelines/dbsampler.py
View file @
3298db8a
...
@@ -2,8 +2,6 @@ import copy
...
@@ -2,8 +2,6 @@ import copy
import
os
import
os
import
pickle
import
pickle
import
cv2
import
mmcv
import
numpy
as
np
import
numpy
as
np
from
mmdet3d.core.bbox
import
box_np_ops
from
mmdet3d.core.bbox
import
box_np_ops
...
@@ -263,255 +261,3 @@ class DataBaseSampler(object):
...
@@ -263,255 +261,3 @@ class DataBaseSampler(object):
boxes
[
i
,
-
1
]
-
sp_boxes
[
i
-
num_gt
,
-
1
])
boxes
[
i
,
-
1
]
-
sp_boxes
[
i
-
num_gt
,
-
1
])
valid_samples
.
append
(
sampled
[
i
-
num_gt
])
valid_samples
.
append
(
sampled
[
i
-
num_gt
])
return
valid_samples
return
valid_samples
@
OBJECTSAMPLERS
.
register_module
()
class
MMDataBaseSampler
(
DataBaseSampler
):
def
__init__
(
self
,
info_path
,
data_root
,
rate
,
prepare
,
object_rot_range
,
sample_groups
,
classes
=
None
,
check_2D_collision
=
False
,
collision_thr
=
0
,
collision_in_classes
=
False
,
depth_consistent
=
False
,
blending_type
=
None
):
super
(
MMDataBaseSampler
,
self
).
__init__
(
info_path
=
info_path
,
data_root
=
data_root
,
rate
=
rate
,
prepare
=
prepare
,
object_rot_range
=
object_rot_range
,
sample_groups
=
sample_groups
,
classes
=
classes
)
self
.
blending_type
=
blending_type
self
.
depth_consistent
=
depth_consistent
self
.
check_2D_collision
=
check_2D_collision
self
.
collision_thr
=
collision_thr
self
.
collision_in_classes
=
collision_in_classes
def
sample_all
(
self
,
gt_bboxes_3d
,
gt_names
,
gt_bboxes_2d
=
None
,
img
=
None
):
sampled_num_dict
=
{}
sample_num_per_class
=
[]
for
class_name
,
max_sample_num
in
zip
(
self
.
sample_classes
,
self
.
sample_max_nums
):
sampled_num
=
int
(
max_sample_num
-
np
.
sum
([
n
==
class_name
for
n
in
gt_names
]))
sampled_num
=
np
.
round
(
self
.
rate
*
sampled_num
).
astype
(
np
.
int64
)
sampled_num_dict
[
class_name
]
=
sampled_num
sample_num_per_class
.
append
(
sampled_num
)
sampled
=
[]
sampled_gt_bboxes_3d
=
[]
sampled_gt_bboxes_2d
=
[]
avoid_coll_boxes_3d
=
gt_bboxes_3d
avoid_coll_boxes_2d
=
gt_bboxes_2d
for
class_name
,
sampled_num
in
zip
(
self
.
sample_classes
,
sample_num_per_class
):
if
sampled_num
>
0
:
sampled_cls
=
self
.
sample_class_v2
(
class_name
,
sampled_num
,
avoid_coll_boxes_3d
,
avoid_coll_boxes_2d
)
sampled
+=
sampled_cls
if
len
(
sampled_cls
)
>
0
:
if
len
(
sampled_cls
)
==
1
:
sampled_gt_box_3d
=
sampled_cls
[
0
][
'box3d_lidar'
][
np
.
newaxis
,
...]
sampled_gt_box_2d
=
sampled_cls
[
0
][
'box2d_camera'
][
np
.
newaxis
,
...]
else
:
sampled_gt_box_3d
=
np
.
stack
(
[
s
[
'box3d_lidar'
]
for
s
in
sampled_cls
],
axis
=
0
)
sampled_gt_box_2d
=
np
.
stack
(
[
s
[
'box2d_camera'
]
for
s
in
sampled_cls
],
axis
=
0
)
sampled_gt_bboxes_3d
+=
[
sampled_gt_box_3d
]
sampled_gt_bboxes_2d
+=
[
sampled_gt_box_2d
]
if
self
.
collision_in_classes
:
# TODO: check whether check collision check among
# classes is necessary
avoid_coll_boxes_3d
=
np
.
concatenate
(
[
avoid_coll_boxes_3d
,
sampled_gt_box_3d
],
axis
=
0
)
avoid_coll_boxes_2d
=
np
.
concatenate
(
[
avoid_coll_boxes_2d
,
sampled_gt_box_2d
],
axis
=
0
)
ret
=
None
if
len
(
sampled
)
>
0
:
sampled_gt_bboxes_3d
=
np
.
concatenate
(
sampled_gt_bboxes_3d
,
axis
=
0
)
sampled_gt_bboxes_2d
=
np
.
concatenate
(
sampled_gt_bboxes_2d
,
axis
=
0
)
s_points_list
=
[]
count
=
0
if
self
.
depth_consistent
:
# change the paster order based on distance
center
=
sampled_gt_bboxes_3d
[:,
0
:
3
]
paste_order
=
np
.
argsort
(
-
np
.
power
(
np
.
sum
(
np
.
power
(
center
,
2
),
axis
=-
1
),
1
/
2
),
axis
=-
1
)
for
idx
in
range
(
len
(
sampled
)):
if
self
.
depth_consistent
:
inds
=
np
.
where
(
paste_order
==
idx
)[
0
][
0
]
info
=
sampled
[
inds
]
else
:
info
=
sampled
[
idx
]
pcd_file_path
=
os
.
path
.
join
(
self
.
data_root
,
info
[
'path'
])
if
self
.
data_root
else
info
[
'path'
]
img_file_path
=
pcd_file_path
+
'.png'
mask_file_path
=
pcd_file_path
+
'.mask.png'
s_points
=
np
.
fromfile
(
pcd_file_path
,
dtype
=
np
.
float32
).
reshape
([
-
1
,
4
])
s_patch
=
mmcv
.
imread
(
img_file_path
)
s_mask
=
mmcv
.
imread
(
mask_file_path
,
'grayscale'
)
if
'rot_transform'
in
info
:
rot
=
info
[
'rot_transform'
]
s_points
[:,
:
3
]
=
box_np_ops
.
rotation_points_single_angle
(
s_points
[:,
:
3
],
rot
,
axis
=
2
)
# TODO: might need to rot 2d bbox in the future
# the points of each sample already minus the object center
# so this time it needs to add the offset back
s_points
[:,
:
3
]
+=
info
[
'box3d_lidar'
][:
3
]
img
=
self
.
paste_obj
(
img
,
s_patch
,
s_mask
,
bbox_2d
=
info
[
'box2d_camera'
].
astype
(
np
.
int32
))
count
+=
1
s_points_list
.
append
(
s_points
)
ret
=
dict
(
img
=
img
,
gt_names
=
np
.
array
([
s
[
'name'
]
for
s
in
sampled
]),
difficulty
=
np
.
array
([
s
[
'difficulty'
]
for
s
in
sampled
]),
gt_bboxes_3d
=
sampled_gt_bboxes_3d
,
gt_bboxes_2d
=
sampled_gt_bboxes_2d
,
points
=
np
.
concatenate
(
s_points_list
,
axis
=
0
),
group_ids
=
np
.
arange
(
gt_bboxes_3d
.
shape
[
0
],
gt_bboxes_3d
.
shape
[
0
]
+
len
(
sampled
)))
return
ret
def
paste_obj
(
self
,
img
,
obj_img
,
obj_mask
,
bbox_2d
):
# paste the image patch back
x1
,
y1
,
x2
,
y2
=
bbox_2d
# the bbox might exceed the img size because the img is different
img_h
,
img_w
=
img
.
shape
[:
2
]
w
=
np
.
maximum
(
min
(
x2
,
img_w
-
1
)
-
x1
+
1
,
1
)
h
=
np
.
maximum
(
min
(
y2
,
img_h
-
1
)
-
y1
+
1
,
1
)
obj_mask
=
obj_mask
[:
h
,
:
w
]
obj_img
=
obj_img
[:
h
,
:
w
]
# choose a blend option
if
not
self
.
blending_type
:
blending_op
=
'none'
else
:
blending_choice
=
np
.
random
.
randint
(
len
(
self
.
blending_type
))
blending_op
=
self
.
blending_type
[
blending_choice
]
if
blending_op
.
find
(
'poisson'
)
!=
-
1
:
# options: cv2.NORMAL_CLONE=1, or cv2.MONOCHROME_TRANSFER=3
# cv2.MIXED_CLONE mixed the texture, thus is not used.
if
blending_op
==
'poisson'
:
mode
=
np
.
random
.
choice
([
1
,
3
],
1
)[
0
]
elif
blending_op
==
'poisson_normal'
:
mode
=
cv2
.
NORMAL_CLONE
elif
blending_op
==
'poisson_transfer'
:
mode
=
cv2
.
MONOCHROME_TRANSFER
else
:
raise
NotImplementedError
center
=
(
int
(
x1
+
w
/
2
),
int
(
y1
+
h
/
2
))
img
=
cv2
.
seamlessClone
(
obj_img
,
img
,
obj_mask
*
255
,
center
,
mode
)
else
:
if
blending_op
==
'gaussian'
:
obj_mask
=
cv2
.
GaussianBlur
(
obj_mask
.
astype
(
np
.
float32
),
(
5
,
5
),
2
)
elif
blending_op
==
'box'
:
obj_mask
=
cv2
.
blur
(
obj_mask
.
astype
(
np
.
float32
),
(
3
,
3
))
paste_mask
=
1
-
obj_mask
img
[
y1
:
y1
+
h
,
x1
:
x1
+
w
]
=
(
img
[
y1
:
y1
+
h
,
x1
:
x1
+
w
].
astype
(
np
.
float32
)
*
paste_mask
[...,
None
]).
astype
(
np
.
uint8
)
img
[
y1
:
y1
+
h
,
x1
:
x1
+
w
]
+=
(
obj_img
.
astype
(
np
.
float32
)
*
obj_mask
[...,
None
]).
astype
(
np
.
uint8
)
return
img
def
sample_class_v2
(
self
,
name
,
num
,
gt_bboxes_3d
,
gt_bboxes_2d
):
sampled
=
self
.
sampler_dict
[
name
].
sample
(
num
)
sampled
=
copy
.
deepcopy
(
sampled
)
num_gt
=
gt_bboxes_3d
.
shape
[
0
]
num_sampled
=
len
(
sampled
)
# avoid collision in BEV first
gt_bboxes_bv
=
box_np_ops
.
center_to_corner_box2d
(
gt_bboxes_3d
[:,
0
:
2
],
gt_bboxes_3d
[:,
3
:
5
],
gt_bboxes_3d
[:,
6
])
sp_boxes
=
np
.
stack
([
i
[
'box3d_lidar'
]
for
i
in
sampled
],
axis
=
0
)
sp_boxes_bv
=
box_np_ops
.
center_to_corner_box2d
(
sp_boxes
[:,
0
:
2
],
sp_boxes
[:,
3
:
5
],
sp_boxes
[:,
6
])
total_bv
=
np
.
concatenate
([
gt_bboxes_bv
,
sp_boxes_bv
],
axis
=
0
)
coll_mat
=
data_augment_utils
.
box_collision_test
(
total_bv
,
total_bv
)
# Then avoid collision in 2D space
if
self
.
check_2D_collision
:
sp_boxes_2d
=
np
.
stack
([
i
[
'box2d_camera'
]
for
i
in
sampled
],
axis
=
0
)
total_bbox_2d
=
np
.
concatenate
([
gt_bboxes_2d
,
sp_boxes_2d
],
axis
=
0
)
# Nx4
# random select a collision threshold
if
isinstance
(
self
.
collision_thr
,
float
):
collision_thr
=
self
.
collision_thr
elif
isinstance
(
self
.
collision_thr
,
list
):
collision_thr
=
np
.
random
.
choice
(
self
.
collision_thr
)
elif
isinstance
(
self
.
collision_thr
,
dict
):
mode
=
self
.
collision_thr
.
get
(
'mode'
,
'value'
)
if
mode
==
'value'
:
collision_thr
=
np
.
random
.
choice
(
self
.
collision_thr
[
'thr_range'
])
elif
mode
==
'range'
:
collision_thr
=
np
.
random
.
uniform
(
self
.
collision_thr
[
'thr_range'
][
0
],
self
.
collision_thr
[
'thr_range'
][
1
])
if
collision_thr
==
0
:
# use similar collision test as BEV did
# Nx4 (x1, y1, x2, y2) -> corners: Nx4x2
# ((x1, y1), (x2, y1), (x1, y2), (x2, y2))
x1y1
=
total_bbox_2d
[:,
:
2
]
x2y2
=
total_bbox_2d
[:,
2
:]
x1y2
=
np
.
stack
([
total_bbox_2d
[:,
0
],
total_bbox_2d
[:,
3
]],
axis
=-
1
)
x2y1
=
np
.
stack
([
total_bbox_2d
[:,
2
],
total_bbox_2d
[:,
1
]],
axis
=-
1
)
total_2d
=
np
.
stack
([
x1y1
,
x2y1
,
x1y2
,
x2y2
],
axis
=
1
)
coll_mat_2d
=
data_augment_utils
.
box_collision_test
(
total_2d
,
total_2d
)
else
:
# use iof rather than iou to protect the foreground
overlaps
=
box_np_ops
.
iou_jit
(
total_bbox_2d
,
total_bbox_2d
,
'iof'
)
coll_mat_2d
=
overlaps
>
collision_thr
coll_mat
=
coll_mat
+
coll_mat_2d
diag
=
np
.
arange
(
total_bv
.
shape
[
0
])
coll_mat
[
diag
,
diag
]
=
False
valid_samples
=
[]
for
i
in
range
(
num_gt
,
num_gt
+
num_sampled
):
if
coll_mat
[
i
].
any
():
coll_mat
[
i
]
=
False
coll_mat
[:,
i
]
=
False
else
:
valid_samples
.
append
(
sampled
[
i
-
num_gt
])
return
valid_samples
mmdet3d/models/detectors/base.py
View file @
3298db8a
from
abc
import
ABCMeta
,
abstractmethod
from
mmdet.models.detectors
import
BaseDetector
import
torch.nn
as
nn
class
Base3DDetector
(
BaseDetector
):
class
Base3DDetector
(
nn
.
Module
,
metaclass
=
ABCMeta
):
"""Base class for detectors"""
"""Base class for detectors"""
def
__init__
(
self
):
def
forward_test
(
self
,
points
,
img_metas
,
img
=
None
,
**
kwargs
):
super
(
Base3DDetector
,
self
).
__init__
()
self
.
fp16_enabled
=
False
@
property
def
with_neck
(
self
):
return
hasattr
(
self
,
'neck'
)
and
self
.
neck
is
not
None
@
property
def
with_shared_head
(
self
):
return
hasattr
(
self
,
'shared_head'
)
and
self
.
shared_head
is
not
None
@
property
def
with_bbox
(
self
):
return
hasattr
(
self
,
'bbox_head'
)
and
self
.
bbox_head
is
not
None
@
property
def
with_mask
(
self
):
return
hasattr
(
self
,
'mask_head'
)
and
self
.
mask_head
is
not
None
@
abstractmethod
def
extract_feat
(
self
,
imgs
):
pass
def
extract_feats
(
self
,
imgs
):
assert
isinstance
(
imgs
,
list
)
for
img
in
imgs
:
yield
self
.
extract_feat
(
img
)
@
abstractmethod
def
forward_train
(
self
,
**
kwargs
):
pass
@
abstractmethod
def
simple_test
(
self
,
**
kwargs
):
pass
@
abstractmethod
def
aug_test
(
self
,
**
kwargs
):
pass
def
init_weights
(
self
,
pretrained
=
None
):
if
pretrained
is
not
None
:
from
mmdet3d.utils
import
get_root_logger
logger
=
get_root_logger
()
logger
.
info
(
'load model from: {}'
.
format
(
pretrained
))
def
forward_test
(
self
,
points
,
img_metas
,
imgs
=
None
,
**
kwargs
):
"""
"""
Args:
Args:
points (List[Tensor]): the outer list indicates test-time
points (List[Tensor]): the outer list indicates test-time
...
@@ -62,7 +13,7 @@ class Base3DDetector(nn.Module, metaclass=ABCMeta):
...
@@ -62,7 +13,7 @@ class Base3DDetector(nn.Module, metaclass=ABCMeta):
img_metas (List[List[dict]]): the outer list indicates test-time
img_metas (List[List[dict]]): the outer list indicates test-time
augs (multiscale, flip, etc.) and the inner list indicates
augs (multiscale, flip, etc.) and the inner list indicates
images in a batch
images in a batch
img
s
(List[Tensor], optional): the outer list indicates test-time
img (List[Tensor], optional): the outer list indicates test-time
augmentations and inner Tensor should have a shape NxCxHxW,
augmentations and inner Tensor should have a shape NxCxHxW,
which contains all images in the batch. Defaults to None.
which contains all images in the batch. Defaults to None.
"""
"""
...
@@ -81,10 +32,10 @@ class Base3DDetector(nn.Module, metaclass=ABCMeta):
...
@@ -81,10 +32,10 @@ class Base3DDetector(nn.Module, metaclass=ABCMeta):
assert
samples_per_gpu
==
1
assert
samples_per_gpu
==
1
if
num_augs
==
1
:
if
num_augs
==
1
:
img
s
=
[
img
s
]
if
img
s
is
None
else
img
s
img
=
[
img
]
if
img
is
None
else
img
return
self
.
simple_test
(
points
[
0
],
img_metas
[
0
],
img
s
[
0
],
**
kwargs
)
return
self
.
simple_test
(
points
[
0
],
img_metas
[
0
],
img
[
0
],
**
kwargs
)
else
:
else
:
return
self
.
aug_test
(
points
,
img_metas
,
img
s
,
**
kwargs
)
return
self
.
aug_test
(
points
,
img_metas
,
img
,
**
kwargs
)
def
forward
(
self
,
return_loss
=
True
,
**
kwargs
):
def
forward
(
self
,
return_loss
=
True
,
**
kwargs
):
"""
"""
...
...
mmdet3d/models/detectors/mvx_two_stage.py
View file @
3298db8a
...
@@ -265,13 +265,13 @@ class MVXTwoStageDetector(Base3DDetector):
...
@@ -265,13 +265,13 @@ class MVXTwoStageDetector(Base3DDetector):
proposal_list
=
proposals
proposal_list
=
proposals
# bbox head forward and loss
# bbox head forward and loss
img_roi_losses
=
self
.
roi_head
.
forward_train
(
x
,
img_metas
,
if
self
.
with_img_bbox
:
proposal_list
,
gt_bboxes
,
# bbox head forward and loss
gt_labels
,
img_roi_losses
=
self
.
img_roi_head
.
forward_train
(
gt_bboxes_ignore
,
x
,
img_metas
,
proposal_list
,
gt_bboxes
,
gt_labels
,
**
kwargs
)
gt_bboxes_ignore
,
**
kwargs
)
losses
.
update
(
img_roi_losses
)
losses
.
update
(
img_roi_losses
)
return
losses
return
losses
def
simple_test_img
(
self
,
x
,
img_metas
,
proposals
=
None
,
rescale
=
False
):
def
simple_test_img
(
self
,
x
,
img_metas
,
proposals
=
None
,
rescale
=
False
):
...
...
mmdet3d/models/detectors/parta2.py
View file @
3298db8a
...
@@ -2,12 +2,17 @@ import torch
...
@@ -2,12 +2,17 @@ import torch
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
from
mmdet3d.ops
import
Voxelization
from
mmdet3d.ops
import
Voxelization
from
mmdet.models
import
DETECTORS
,
TwoStageDetector
from
mmdet.models
import
DETECTORS
from
..
import
builder
from
..
import
builder
from
.two_stage
import
TwoStage3DDetector
@
DETECTORS
.
register_module
()
@
DETECTORS
.
register_module
()
class
PartA2
(
TwoStageDetector
):
class
PartA2
(
TwoStage3DDetector
):
"""Part-A2 detector
Please refer to the `paper <https://arxiv.org/abs/1907.03670>`_
"""
def
__init__
(
self
,
def
__init__
(
self
,
voxel_layer
,
voxel_layer
,
...
@@ -111,41 +116,6 @@ class PartA2(TwoStageDetector):
...
@@ -111,41 +116,6 @@ class PartA2(TwoStageDetector):
return
losses
return
losses
def
forward_test
(
self
,
points
,
img_metas
,
imgs
=
None
,
**
kwargs
):
"""
Args:
points (List[Tensor]): the outer list indicates test-time
augmentations and inner Tensor should have a shape NxC,
which contains all points in the batch.
img_metas (List[List[dict]]): the outer list indicates test-time
augs (multiscale, flip, etc.) and the inner list indicates
images in a batch
"""
for
var
,
name
in
[(
points
,
'points'
),
(
img_metas
,
'img_metas'
)]:
if
not
isinstance
(
var
,
list
):
raise
TypeError
(
'{} must be a list, but got {}'
.
format
(
name
,
type
(
var
)))
num_augs
=
len
(
points
)
if
num_augs
!=
len
(
img_metas
):
raise
ValueError
(
'num of augmentations ({}) != num of image meta ({})'
.
format
(
len
(
points
),
len
(
img_metas
)))
# TODO: remove the restriction of imgs_per_gpu == 1 when prepared
samples_per_gpu
=
len
(
points
[
0
])
assert
samples_per_gpu
==
1
if
num_augs
==
1
:
return
self
.
simple_test
(
points
[
0
],
img_metas
[
0
],
**
kwargs
)
else
:
return
self
.
aug_test
(
points
,
img_metas
,
**
kwargs
)
def
forward
(
self
,
return_loss
=
True
,
**
kwargs
):
if
return_loss
:
return
self
.
forward_train
(
**
kwargs
)
else
:
return
self
.
forward_test
(
**
kwargs
)
def
simple_test
(
self
,
points
,
img_metas
,
proposals
=
None
,
rescale
=
False
):
def
simple_test
(
self
,
points
,
img_metas
,
proposals
=
None
,
rescale
=
False
):
feats_dict
,
voxels_dict
=
self
.
extract_feat
(
points
,
img_metas
)
feats_dict
,
voxels_dict
=
self
.
extract_feat
(
points
,
img_metas
)
...
@@ -159,6 +129,3 @@ class PartA2(TwoStageDetector):
...
@@ -159,6 +129,3 @@ class PartA2(TwoStageDetector):
return
self
.
roi_head
.
simple_test
(
feats_dict
,
voxels_dict
,
img_metas
,
return
self
.
roi_head
.
simple_test
(
feats_dict
,
voxels_dict
,
img_metas
,
proposal_list
)
proposal_list
)
def
aug_test
(
self
,
**
kwargs
):
raise
NotImplementedError
mmdet3d/models/detectors/single_stage.py
View file @
3298db8a
...
@@ -6,6 +6,21 @@ from .base import Base3DDetector
...
@@ -6,6 +6,21 @@ from .base import Base3DDetector
@
DETECTORS
.
register_module
()
@
DETECTORS
.
register_module
()
class
SingleStage3DDetector
(
Base3DDetector
):
class
SingleStage3DDetector
(
Base3DDetector
):
"""SingleStage3DDetector
This class serves as a base class for single-stage 3D detectors.
Args:
backbone (dict): Config dict of detector's backbone.
neck (dict, optional): Config dict of neck. Defaults to None.
bbox_head (dict, optional): Config dict of box head. Defaults to None.
train_cfg (dict, optional): Config dict of training hyper-parameters.
Defaults to None.
test_cfg (dict, optional): Config dict of test hyper-parameters.
Defaults to None.
pretrained (str, optional): Path of pretrained models.
Defaults to None.
"""
def
__init__
(
self
,
def
__init__
(
self
,
backbone
,
backbone
,
...
...
mmdet3d/models/detectors/two_stage.py
0 → 100644
View file @
3298db8a
from
mmdet.models
import
DETECTORS
,
TwoStageDetector
from
.base
import
Base3DDetector
@
DETECTORS
.
register_module
()
class
TwoStage3DDetector
(
Base3DDetector
,
TwoStageDetector
):
"""Base class of two-stage 3D detector
It inherits original ``:class:TwoStageDetector`` and
``:class:Base3DDetector``. This class could serve as a base class for
all two-stage 3D detectors.
"""
def
__init__
(
self
,
**
kwargs
):
super
(
TwoStage3DDetector
,
self
).
__init__
(
**
kwargs
)
mmdet3d/models/fusion_layers/point_fusion.py
View file @
3298db8a
...
@@ -287,7 +287,7 @@ class PointFusion(nn.Module):
...
@@ -287,7 +287,7 @@ class PointFusion(nn.Module):
pts
.
new_tensor
(
img_meta
[
'pcd_rotation'
])
if
'pcd_rotation'
pts
.
new_tensor
(
img_meta
[
'pcd_rotation'
])
if
'pcd_rotation'
in
img_meta
.
keys
()
else
torch
.
eye
(
3
).
type_as
(
pts
).
to
(
pts
.
device
))
in
img_meta
.
keys
()
else
torch
.
eye
(
3
).
type_as
(
pts
).
to
(
pts
.
device
))
img_scale_factor
=
(
img_scale_factor
=
(
img_meta
[
'scale_factor'
]
pts
.
new_tensor
(
img_meta
[
'scale_factor'
]
[:
2
])
if
'scale_factor'
in
img_meta
.
keys
()
else
1
)
if
'scale_factor'
in
img_meta
.
keys
()
else
1
)
pcd_flip
=
img_meta
[
'pcd_flip'
]
if
'pcd_flip'
in
img_meta
.
keys
(
pcd_flip
=
img_meta
[
'pcd_flip'
]
if
'pcd_flip'
in
img_meta
.
keys
(
)
else
False
)
else
False
...
...
requirements/runtime.txt
View file @
3298db8a
matplotlib
matplotlib
mmcv>=0.
5.1
mmcv>=0.
6.0
numba==0.48.0
numba==0.48.0
numpy
numpy
# need older pillow until torchvision is fixed
# need older pillow until torchvision is fixed
...
...
tests/test_config.py
View file @
3298db8a
...
@@ -115,10 +115,8 @@ def test_config_data_pipeline():
...
@@ -115,10 +115,8 @@ def test_config_data_pipeline():
print
(
'Found config_dpath = {!r}'
.
format
(
config_dpath
))
print
(
'Found config_dpath = {!r}'
.
format
(
config_dpath
))
# Only tests a representative subset of configurations
# Only tests a representative subset of configurations
# TODO: test pipelines using Albu, current Albu throw None given empty GT
config_names
=
[
config_names
=
[
'mvxnet/faster_rcnn_r50_fpn_caffe_2x8_1x_nus.py'
,
'mvxnet/faster_rcnn_r50_fpn_caffe_2x8_1x_nus.py'
,
'mvxnet/retinanet_r50_fpn_caffe_2x8_1x_nus.py'
,
'mvxnet/'
'mvxnet/'
'faster_rcnn_r50_fpn_caffe_1x_kitti-2d-3class_coco-3x-pretrain.py'
,
'faster_rcnn_r50_fpn_caffe_1x_kitti-2d-3class_coco-3x-pretrain.py'
,
]
]
...
...
tests/test_forward.py
View file @
3298db8a
...
@@ -82,10 +82,10 @@ def _test_two_stage_forward(cfg_file):
...
@@ -82,10 +82,10 @@ def _test_two_stage_forward(cfg_file):
gt_masks
=
gt_masks
,
gt_masks
=
gt_masks
,
return_loss
=
True
)
return_loss
=
True
)
assert
isinstance
(
losses
,
dict
)
assert
isinstance
(
losses
,
dict
)
from
mmdet.apis.train
import
parse_losses
loss
,
_
=
detector
.
_
parse_losses
(
losses
)
total_loss
=
parse_losses
(
losses
)[
0
]
.
requires_grad_
(
True
)
loss
.
requires_grad_
(
True
)
assert
float
(
total_
loss
.
item
())
>
0
assert
float
(
loss
.
item
())
>
0
total_
loss
.
backward
()
loss
.
backward
()
# Test forward train with an empty truth batch
# Test forward train with an empty truth batch
mm_inputs
=
_demo_mm_inputs
(
input_shape
,
num_items
=
[
0
])
mm_inputs
=
_demo_mm_inputs
(
input_shape
,
num_items
=
[
0
])
...
@@ -102,10 +102,9 @@ def _test_two_stage_forward(cfg_file):
...
@@ -102,10 +102,9 @@ def _test_two_stage_forward(cfg_file):
gt_masks
=
gt_masks
,
gt_masks
=
gt_masks
,
return_loss
=
True
)
return_loss
=
True
)
assert
isinstance
(
losses
,
dict
)
assert
isinstance
(
losses
,
dict
)
from
mmdet.apis.train
import
parse_losses
loss
,
_
=
detector
.
_parse_losses
(
losses
)
total_loss
=
parse_losses
(
losses
)[
0
].
requires_grad_
(
True
)
assert
float
(
loss
.
item
())
>
0
assert
float
(
total_loss
.
item
())
>
0
loss
.
backward
()
total_loss
.
backward
()
# Test forward test
# Test forward test
with
torch
.
no_grad
():
with
torch
.
no_grad
():
...
@@ -140,6 +139,8 @@ def _test_single_stage_forward(cfg_file):
...
@@ -140,6 +139,8 @@ def _test_single_stage_forward(cfg_file):
gt_labels
=
gt_labels
,
gt_labels
=
gt_labels
,
return_loss
=
True
)
return_loss
=
True
)
assert
isinstance
(
losses
,
dict
)
assert
isinstance
(
losses
,
dict
)
loss
,
_
=
detector
.
_parse_losses
(
losses
)
assert
float
(
loss
.
item
())
>
0
# Test forward test
# Test forward test
with
torch
.
no_grad
():
with
torch
.
no_grad
():
...
...
tools/train.py
View file @
3298db8a
...
@@ -12,11 +12,10 @@ from mmcv import Config, DictAction
...
@@ -12,11 +12,10 @@ from mmcv import Config, DictAction
from
mmcv.runner
import
init_dist
from
mmcv.runner
import
init_dist
from
mmdet3d
import
__version__
from
mmdet3d
import
__version__
from
mmdet3d.apis
import
train_detector
from
mmdet3d.datasets
import
build_dataset
from
mmdet3d.datasets
import
build_dataset
from
mmdet3d.models
import
build_detector
from
mmdet3d.models
import
build_detector
from
mmdet3d.utils
import
collect_env
,
get_root_logger
from
mmdet3d.utils
import
collect_env
,
get_root_logger
from
mmdet.apis
import
set_random_seed
from
mmdet.apis
import
set_random_seed
,
train_detector
def
parse_args
():
def
parse_args
():
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment