Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
b496f579
Commit
b496f579
authored
Jul 18, 2022
by
ZCMax
Committed by
ChaimZhu
Jul 20, 2022
Browse files
[Refactor] Refactor Mono3D models
parent
35667791
Changes
36
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1225 additions
and
522 deletions
+1225
-522
configs/_base_/datasets/kitti-mono3d.py
configs/_base_/datasets/kitti-mono3d.py
+55
-61
configs/_base_/datasets/nus-mono3d.py
configs/_base_/datasets/nus-mono3d.py
+74
-64
configs/_base_/models/fcos3d.py
configs/_base_/models/fcos3d.py
+15
-7
configs/_base_/models/pgd.py
configs/_base_/models/pgd.py
+6
-5
configs/_base_/models/smoke.py
configs/_base_/models/smoke.py
+11
-3
configs/_base_/schedules/mmdet_schedule_1x.py
configs/_base_/schedules/mmdet_schedule_1x.py
+21
-10
configs/fcos3d/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d.py
...3d/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d.py
+55
-47
configs/pgd/pgd_r101_caffe_fpn_gn-head_3x4_4x_kitti-mono3d.py
...igs/pgd/pgd_r101_caffe_fpn_gn-head_3x4_4x_kitti-mono3d.py
+60
-50
configs/smoke/smoke_dla34_pytorch_dlaneck_gn-all_8x4_6x_kitti-mono3d.py
...smoke_dla34_pytorch_dlaneck_gn-all_8x4_6x_kitti-mono3d.py
+44
-38
mmdet3d/datasets/__init__.py
mmdet3d/datasets/__init__.py
+2
-1
mmdet3d/datasets/convert_utils.py
mmdet3d/datasets/convert_utils.py
+256
-0
mmdet3d/datasets/det3d_dataset.py
mmdet3d/datasets/det3d_dataset.py
+1
-0
mmdet3d/datasets/nuscenes_dataset.py
mmdet3d/datasets/nuscenes_dataset.py
+102
-6
mmdet3d/datasets/pipelines/formating.py
mmdet3d/datasets/pipelines/formating.py
+1
-1
mmdet3d/datasets/pipelines/loading.py
mmdet3d/datasets/pipelines/loading.py
+55
-3
mmdet3d/datasets/pipelines/transforms_3d.py
mmdet3d/datasets/pipelines/transforms_3d.py
+46
-44
mmdet3d/metrics/kitti_metric.py
mmdet3d/metrics/kitti_metric.py
+2
-0
mmdet3d/metrics/nuscenes_metric.py
mmdet3d/metrics/nuscenes_metric.py
+297
-25
mmdet3d/models/data_preprocessors/data_preprocessor.py
mmdet3d/models/data_preprocessors/data_preprocessor.py
+1
-2
mmdet3d/models/dense_heads/anchor_free_mono3d_head.py
mmdet3d/models/dense_heads/anchor_free_mono3d_head.py
+121
-155
No files found.
configs/_base_/datasets/kitti-mono3d.py
View file @
b496f579
dataset_type
=
'Kitti
Mono
Dataset'
dataset_type
=
'KittiDataset'
data_root
=
'data/kitti/'
class_names
=
[
'Pedestrian'
,
'Cyclist'
,
'Car'
]
input_modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
)
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
metainfo
=
dict
(
CLASSES
=
class_names
)
# file_client_args = dict(backend='disk')
# Uncomment the following if use ceph or other file clients.
# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
# for more details.
file_client_args
=
dict
(
backend
=
'petrel'
,
path_mapping
=
dict
({
'./data/kitti/'
:
's3://openmmlab/datasets/detection3d/kitti/'
,
'data/kitti/'
:
's3://openmmlab/datasets/detection3d/kitti/'
}))
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
),
dict
(
type
=
'LoadImageFromFileMono3D'
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox
=
True
,
...
...
@@ -14,79 +27,60 @@ train_pipeline = [
with_bbox_3d
=
True
,
with_label_3d
=
True
,
with_bbox_depth
=
True
),
dict
(
type
=
'Resize'
,
img_
scale
=
(
1242
,
375
),
keep_ratio
=
True
),
dict
(
type
=
'Resize'
,
scale
=
(
1242
,
375
),
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio_bev_horizontal
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'
Collect3D
'
,
type
=
'
Pack3DDetInputs
'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'centers2d'
,
'depths'
'centers
_
2d'
,
'depths'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
1242
,
375
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'RandomFlip3D'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'img'
]),
])
dict
(
type
=
'LoadImageFromFileMono3D'
,
file_client_args
=
file_client_args
),
dict
(
type
=
'Resize'
,
scale
=
(
1242
,
375
),
keep_ratio
=
True
),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'img'
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'img'
])
]
data
=
dict
(
samples_per_gpu
=
2
,
workers_per_gpu
=
2
,
train
=
dict
(
train_dataloader
=
dict
(
batch_size
=
2
,
num_workers
=
2
,
persistent_workers
=
True
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_train_mono3d.coco.json'
,
info_file
=
data_root
+
'kitti_infos_train.pkl'
,
img_prefix
=
data_root
,
classes
=
class_names
,
ann_file
=
'kitti_infos_train.pkl'
,
data_prefix
=
dict
(
img
=
'training/image_2'
),
pipeline
=
train_pipeline
,
modality
=
input_modality
,
test_mode
=
False
,
box_type_3d
=
'Camera'
),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val_mono3d.coco.json'
,
info_file
=
data_root
+
'kitti_infos_val.pkl'
,
img_prefix
=
data_root
,
classes
=
class_names
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
test_mode
=
True
,
box_type_3d
=
'Camera'
),
test
=
dict
(
metainfo
=
metainfo
,
# we use box_type_3d='Camera' in monocular 3d
# detection task
box_type_3d
=
'Camera'
))
val_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
2
,
persistent_workers
=
True
,
drop_last
=
False
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val_mono3d.coco.json'
,
info_file
=
data_root
+
'kitti_infos_val.pkl'
,
img_prefix
=
data_root
,
classes
=
class_names
,
data_prefix
=
dict
(
img
=
'training/image_2'
),
ann_file
=
'kitti_infos_val.pkl'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
metainfo
=
metainfo
,
test_mode
=
True
,
box_type_3d
=
'Camera'
))
evaluation
=
dict
(
interval
=
2
)
test_dataloader
=
val_dataloader
val_evaluator
=
dict
(
type
=
'KittiMetric'
,
ann_file
=
data_root
+
'kitti_infos_val.pkl'
,
metric
=
'bbox'
,
pred_box_type_3d
=
'Camera'
)
test_evaluator
=
val_evaluator
configs/_base_/datasets/nus-mono3d.py
View file @
b496f579
dataset_type
=
'NuScenes
Mono
Dataset'
dataset_type
=
'NuScenesDataset'
data_root
=
'data/nuscenes/'
class_names
=
[
'car'
,
'truck'
,
'trailer'
,
'bus'
,
'construction_vehicle'
,
'bicycle'
,
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
]
metainfo
=
dict
(
CLASSES
=
class_names
)
# Input modality for nuScenes dataset, this is consistent with the submission
# format which requires the information in input_modality.
input_modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
,
use_radar
=
False
,
use_map
=
False
,
use_external
=
False
)
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
input_modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
)
# file_client_args = dict(backend='disk')
# Uncomment the following if use ceph or other file clients.
# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
# for more details.
file_client_args
=
dict
(
backend
=
'petrel'
,
path_mapping
=
dict
({
'./data/nuscenes/'
:
's3://openmmlab/datasets/detection3d/nuscenes/'
,
'data/nuscenes/'
:
's3://openmmlab/datasets/detection3d/nuscenes/'
}))
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
),
dict
(
type
=
'LoadImageFromFileMono3D'
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox
=
True
,
...
...
@@ -26,75 +34,77 @@ train_pipeline = [
with_bbox_depth
=
True
),
dict
(
type
=
'Resize'
,
img_scale
=
(
1600
,
900
),
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio_bev_horizontal
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'
Collect3D
'
,
type
=
'
Pack3DDetInputs
'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
,
'attr_labels'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'centers2d'
,
'depths'
'gt_labels_3d'
,
'centers
_
2d'
,
'depths'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
),
dict
(
type
=
'MultiScaleFlipAug'
,
scale_factor
=
1.0
,
flip
=
False
,
transforms
=
[
dict
(
type
=
'RandomFlip3D'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'img'
]),
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'img'
])
dict
(
type
=
'LoadImageFromFileMono3D'
,
file_client_args
=
file_client_args
),
dict
(
type
=
'mmdet.Resize'
,
scale
=
(
1600
,
900
),
keep_ratio
=
True
),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'img'
])
]
data
=
dict
(
samples_per_gpu
=
2
,
workers_per_gpu
=
2
,
train
=
dict
(
train_dataloader
=
dict
(
batch_size
=
2
,
num_workers
=
2
,
persistent_workers
=
True
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'nuscenes_infos_train_mono3d.coco.json'
,
img_prefix
=
data_root
,
classes
=
class_names
,
data_prefix
=
dict
(
pts
=
''
,
CAM_FRONT
=
'samples/CAM_FRONT'
,
CAM_FRONT_LEFT
=
'samples/CAM_FRONT_LEFT'
,
CAM_FRONT_RIGHT
=
'samples/CAM_FRONT_RIGHT'
,
CAM_BACK
=
'samples/CAM_BACK'
,
CAM_BACK_RIGHT
=
'samples/CAM_BACK_RIGHT'
,
CAM_BACK_LEFT
=
'samples/CAM_BACK_LEFT'
),
ann_file
=
'nuscenes_infos_train.pkl'
,
task
=
'mono3d'
,
pipeline
=
train_pipeline
,
metainfo
=
metainfo
,
modality
=
input_modality
,
test_mode
=
False
,
box_type_3d
=
'Camera'
),
val
=
dict
(
# we use box_type_3d='Camera' in monocular 3d
# detection task
box_type_3d
=
'Camera'
,
use_valid_flag
=
True
))
val_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
2
,
persistent_workers
=
True
,
drop_last
=
False
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'nuscenes_infos_val_mono3d.coco.json'
,
img_prefix
=
data_root
,
classes
=
class_names
,
data_prefix
=
dict
(
pts
=
''
,
CAM_FRONT
=
'samples/CAM_FRONT'
,
CAM_FRONT_LEFT
=
'samples/CAM_FRONT_LEFT'
,
CAM_FRONT_RIGHT
=
'samples/CAM_FRONT_RIGHT'
,
CAM_BACK
=
'samples/CAM_BACK'
,
CAM_BACK_RIGHT
=
'samples/CAM_BACK_RIGHT'
,
CAM_BACK_LEFT
=
'samples/CAM_BACK_LEFT'
),
ann_file
=
'nuscenes_infos_val.pkl'
,
task
=
'mono3d'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
metainfo
=
metainfo
,
test_mode
=
True
,
box_type_3d
=
'Camera'
),
test
=
dict
(
type
=
dataset_type
,
box_type_3d
=
'Camera'
,
use_valid_flag
=
True
))
test_dataloader
=
val_dataloader
val_evaluator
=
dict
(
type
=
'NuScenesMetric'
,
data_root
=
data_root
,
ann_file
=
data_root
+
'nuscenes_infos_val_mono3d.coco.json'
,
img_prefix
=
data_root
,
classes
=
class_names
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
test_mode
=
True
,
box_type_3d
=
'Camera'
))
evaluation
=
dict
(
interval
=
2
)
ann_file
=
data_root
+
'nuscenes_infos_val.pkl'
,
metric
=
'bbox'
)
test_evaluator
=
val_evaluator
configs/_base_/models/fcos3d.py
View file @
b496f579
# model settings
model
=
dict
(
type
=
'FCOSMono3D'
,
data_preprocessor
=
dict
(
type
=
'Det3DDataPreprocessor'
,
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
bgr_to_rgb
=
True
,
pad_size_divisor
=
32
),
backbone
=
dict
(
type
=
'ResNet'
,
type
=
'
mmdet.
ResNet'
,
depth
=
101
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
...
...
@@ -13,7 +20,7 @@ model = dict(
type
=
'Pretrained'
,
checkpoint
=
'open-mmlab://detectron2/resnet101_caffe'
)),
neck
=
dict
(
type
=
'FPN'
,
type
=
'
mmdet.
FPN'
,
in_channels
=
[
256
,
512
,
1024
,
2048
],
out_channels
=
256
,
start_level
=
1
,
...
...
@@ -45,18 +52,19 @@ model = dict(
dir_branch
=
(
256
,
),
attr_branch
=
(
256
,
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
type
=
'
mmdet.
FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'mmdet.SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_attr
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_centerness
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
bbox_coder
=
dict
(
type
=
'FCOS3DBBoxCoder'
,
code_size
=
9
),
norm_on_bbox
=
True
,
centerness_on_reg
=
True
,
...
...
configs/_base_/models/pgd.py
View file @
b496f579
...
...
@@ -28,18 +28,19 @@ model = dict(
dir_branch
=
(
256
,
),
attr_branch
=
(
256
,
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
type
=
'
mmdet.
FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'mmdet.SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_attr
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_centerness
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
norm_on_bbox
=
True
,
centerness_on_reg
=
True
,
center_sampling
=
True
,
...
...
configs/_base_/models/smoke.py
View file @
b496f579
# model settings
model
=
dict
(
type
=
'SMOKEMono3D'
,
data_preprocessor
=
dict
(
type
=
'Det3DDataPreprocessor'
,
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
bgr_to_rgb
=
True
,
pad_size_divisor
=
32
),
backbone
=
dict
(
type
=
'DLANet'
,
depth
=
34
,
...
...
@@ -42,10 +49,11 @@ model = dict(
base_dims
=
((
0.88
,
1.73
,
0.67
),
(
1.78
,
1.70
,
0.58
),
(
3.88
,
1.63
,
1.53
)),
code_size
=
7
),
loss_cls
=
dict
(
type
=
'GaussianFocalLoss'
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
reduction
=
'sum'
,
loss_weight
=
1
/
300
),
loss_cls
=
dict
(
type
=
'mmdet.GaussianFocalLoss'
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'mmdet.L1Loss'
,
reduction
=
'sum'
,
loss_weight
=
1
/
300
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_attr
=
None
,
conv_bias
=
True
,
dcn_on_last_conv
=
False
),
...
...
configs/_base_/schedules/mmdet_schedule_1x.py
View file @
b496f579
# training schedule for 1x
train_cfg
=
dict
(
type
=
'EpochBasedTrainLoop'
,
max_epochs
=
12
,
val_interval
=
1
)
val_cfg
=
dict
(
type
=
'ValLoop'
)
test_cfg
=
dict
(
type
=
'TestLoop'
)
# learning rate
param_scheduler
=
[
dict
(
type
=
'LinearLR'
,
start_factor
=
0.001
,
by_epoch
=
False
,
begin
=
0
,
end
=
500
),
dict
(
type
=
'MultiStepLR'
,
begin
=
0
,
end
=
12
,
by_epoch
=
True
,
milestones
=
[
8
,
11
],
gamma
=
0.1
)
]
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.02
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
optimizer_config
=
dict
(
grad_clip
=
None
)
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
0.001
,
step
=
[
8
,
11
])
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
12
)
optim_wrapper
=
dict
(
type
=
'OptimWrapper'
,
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.02
,
momentum
=
0.9
,
weight_decay
=
0.0001
))
configs/fcos3d/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d.py
View file @
b496f579
...
...
@@ -4,18 +4,31 @@ _base_ = [
]
# model settings
model
=
dict
(
data_preprocessor
=
dict
(
type
=
'Det3DDataPreprocessor'
,
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
bgr_to_rgb
=
False
,
pad_size_divisor
=
32
),
backbone
=
dict
(
dcn
=
dict
(
type
=
'DCNv2'
,
deform_groups
=
1
,
fallback_on_stride
=
False
),
stage_with_dcn
=
(
False
,
False
,
True
,
True
)))
class_names
=
[
'car'
,
'truck'
,
'trailer'
,
'bus'
,
'construction_vehicle'
,
'bicycle'
,
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
]
img_norm_cfg
=
dict
(
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
to_rgb
=
False
)
# file_client_args = dict(backend='disk')
# Uncomment the following if use ceph or other file clients.
# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
# for more details.
file_client_args
=
dict
(
backend
=
'petrel'
,
path_mapping
=
dict
({
'./data/nuscenes/'
:
's3://openmmlab/datasets/detection3d/nuscenes/'
,
'data/nuscenes/'
:
's3://openmmlab/datasets/detection3d/nuscenes/'
}))
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
),
dict
(
type
=
'LoadImageFromFileMono3D'
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox
=
True
,
...
...
@@ -24,52 +37,47 @@ train_pipeline = [
with_bbox_3d
=
True
,
with_label_3d
=
True
,
with_bbox_depth
=
True
),
dict
(
type
=
'Resize'
,
img_
scale
=
(
1600
,
900
),
keep_ratio
=
True
),
dict
(
type
=
'
mmdet.
Resize'
,
scale
=
(
1600
,
900
),
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio_bev_horizontal
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'
Collect3D
'
,
type
=
'
Pack3DDetInputs
'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
,
'attr_labels'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'centers2d'
,
'depths'
'gt_labels_3d'
,
'centers
_
2d'
,
'depths'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
),
dict
(
type
=
'LoadImageFromFileMono3D'
,
file_client_args
=
file_client_args
),
dict
(
type
=
'mmdet.Resize'
,
scale_factor
=
1.0
),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'img'
])
]
train_dataloader
=
dict
(
batch_size
=
2
,
num_workers
=
2
,
dataset
=
dict
(
dataset
=
dict
(
pipeline
=
train_pipeline
)))
test_dataloader
=
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
))
val_dataloader
=
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
))
# optimizer
optim_wrapper
=
dict
(
optimizer
=
dict
(
lr
=
0.002
),
paramwise_cfg
=
dict
(
bias_lr_mult
=
2.
,
bias_decay_mult
=
0.
),
clip_grad
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning rate
param_scheduler
=
[
dict
(
type
=
'MultiScaleFlipAug'
,
scale_factor
=
1.0
,
flip
=
False
,
transforms
=
[
dict
(
type
=
'RandomFlip3D'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
type
=
'LinearLR'
,
start_factor
=
1.0
/
3
,
by_epoch
=
False
,
begin
=
0
,
end
=
500
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'img'
]),
])
type
=
'MultiStepLR'
,
begin
=
0
,
end
=
12
,
by_epoch
=
True
,
milestones
=
[
8
,
11
],
gamma
=
0.1
)
]
data
=
dict
(
samples_per_gpu
=
2
,
workers_per_gpu
=
2
,
train
=
dict
(
pipeline
=
train_pipeline
),
val
=
dict
(
pipeline
=
test_pipeline
),
test
=
dict
(
pipeline
=
test_pipeline
))
# optimizer
optimizer
=
dict
(
lr
=
0.002
,
paramwise_cfg
=
dict
(
bias_lr_mult
=
2.
,
bias_decay_mult
=
0.
))
optimizer_config
=
dict
(
_delete_
=
True
,
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
1.0
/
3
,
step
=
[
8
,
11
])
total_epochs
=
12
evaluation
=
dict
(
interval
=
2
)
configs/pgd/pgd_r101_caffe_fpn_gn-head_3x4_4x_kitti-mono3d.py
View file @
b496f579
...
...
@@ -4,6 +4,12 @@ _base_ = [
]
# model settings
model
=
dict
(
data_preprocessor
=
dict
(
type
=
'Det3DDataPreprocessor'
,
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
bgr_to_rgb
=
False
,
pad_size_divisor
=
32
),
backbone
=
dict
(
frozen_stages
=
0
),
neck
=
dict
(
start_level
=
0
,
num_outs
=
4
),
bbox_head
=
dict
(
...
...
@@ -27,16 +33,17 @@ model = dict(
),
centerness_branch
=
(
256
,
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
type
=
'
mmdet.
FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'mmdet.SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_centerness
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
use_depth_classifier
=
True
,
depth_branch
=
(
256
,
),
depth_range
=
(
0
,
70
),
...
...
@@ -61,11 +68,21 @@ model = dict(
]),
test_cfg
=
dict
(
nms_pre
=
100
,
nms_thr
=
0.05
,
score_thr
=
0.001
,
max_per_img
=
20
))
class_names
=
[
'Pedestrian'
,
'Cyclist'
,
'Car'
]
img_norm_cfg
=
dict
(
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
to_rgb
=
False
)
# file_client_args = dict(backend='disk')
# Uncomment the following if use ceph or other file clients.
# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
# for more details.
file_client_args
=
dict
(
backend
=
'petrel'
,
path_mapping
=
dict
({
'./data/kitti/'
:
's3://openmmlab/datasets/detection3d/kitti/'
,
'data/kitti/'
:
's3://openmmlab/datasets/detection3d/kitti/'
}))
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
),
dict
(
type
=
'LoadImageFromFileMono3D'
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox
=
True
,
...
...
@@ -74,54 +91,47 @@ train_pipeline = [
with_bbox_3d
=
True
,
with_label_3d
=
True
,
with_bbox_depth
=
True
),
dict
(
type
=
'Resize'
,
img_
scale
=
(
1242
,
375
),
keep_ratio
=
True
),
dict
(
type
=
'
mmdet.
Resize'
,
scale
=
(
1242
,
375
),
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio_bev_horizontal
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'
Collect3D
'
,
type
=
'
Pack3DDetInputs
'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'centers2d'
,
'depths'
'centers
_
2d'
,
'depths'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
),
dict
(
type
=
'LoadImageFromFileMono3D'
,
file_client_args
=
file_client_args
),
dict
(
type
=
'mmdet.Resize'
,
scale_factor
=
1.0
),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'img'
])
]
train_dataloader
=
dict
(
batch_size
=
3
,
num_workers
=
3
,
dataset
=
dict
(
pipeline
=
train_pipeline
))
test_dataloader
=
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
))
val_dataloader
=
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
))
# optimizer
optim_wrapper
=
dict
(
optimizer
=
dict
(
lr
=
0.01
),
paramwise_cfg
=
dict
(
bias_lr_mult
=
2.
,
bias_decay_mult
=
0.
),
clip_grad
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning rate
param_scheduler
=
[
dict
(
type
=
'MultiScaleFlipAug'
,
scale_factor
=
1.0
,
flip
=
False
,
transforms
=
[
dict
(
type
=
'RandomFlip3D'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
type
=
'LinearLR'
,
start_factor
=
1.0
/
3
,
by_epoch
=
False
,
begin
=
0
,
end
=
500
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'img'
]),
])
type
=
'MultiStepLR'
,
begin
=
0
,
end
=
48
,
by_epoch
=
True
,
milestones
=
[
32
,
44
],
gamma
=
0.1
)
]
data
=
dict
(
samples_per_gpu
=
3
,
workers_per_gpu
=
3
,
train
=
dict
(
pipeline
=
train_pipeline
),
val
=
dict
(
pipeline
=
test_pipeline
),
test
=
dict
(
pipeline
=
test_pipeline
))
# optimizer
optimizer
=
dict
(
lr
=
0.001
,
paramwise_cfg
=
dict
(
bias_lr_mult
=
2.
,
bias_decay_mult
=
0.
))
optimizer_config
=
dict
(
_delete_
=
True
,
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
1.0
/
3
,
step
=
[
32
,
44
])
total_epochs
=
48
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
48
)
evaluation
=
dict
(
interval
=
2
)
checkpoint_config
=
dict
(
interval
=
8
)
train_cfg
=
dict
(
max_epochs
=
48
)
configs/smoke/smoke_dla34_pytorch_dlaneck_gn-all_8x4_6x_kitti-mono3d.py
View file @
b496f579
...
...
@@ -3,21 +3,21 @@ _base_ = [
'../_base_/default_runtime.py'
]
# optimizer
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
2.5e-4
)
optimizer_config
=
dict
(
grad_clip
=
None
)
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
None
,
step
=
[
50
])
# runtime settings
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
72
)
log_config
=
dict
(
interval
=
10
)
# file_client_args = dict(backend='disk')
# Uncomment the following if use ceph or other file clients.
# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
# for more details.
file_client_args
=
dict
(
backend
=
'petrel'
,
path_mapping
=
dict
({
'./data/kitti/'
:
's3://openmmlab/datasets/detection3d/kitti/'
,
'data/kitti/'
:
's3://openmmlab/datasets/detection3d/kitti/'
}))
find_unused_parameters
=
True
class_names
=
[
'Pedestrian'
,
'Cyclist'
,
'Car'
]
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
),
dict
(
type
=
'LoadImageFromFileMono3D'
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox
=
True
,
...
...
@@ -29,36 +29,42 @@ train_pipeline = [
dict
(
type
=
'RandomFlip3D'
,
flip_ratio_bev_horizontal
=
0.5
),
dict
(
type
=
'RandomShiftScale'
,
shift_scale
=
(
0.2
,
0.4
),
aug_prob
=
0.3
),
dict
(
type
=
'AffineResize'
,
img_scale
=
(
1280
,
384
),
down_ratio
=
4
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'
Collect3D
'
,
type
=
'
Pack3DDetInputs
'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'centers2d'
,
'depths'
'centers
_
2d'
,
'depths'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
1280
,
384
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
,
file_client_args
=
file_client_args
),
dict
(
type
=
'AffineResize'
,
img_scale
=
(
1280
,
384
),
down_ratio
=
4
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'img'
])
]
train_dataloader
=
dict
(
batch_size
=
8
,
num_workers
=
4
,
dataset
=
dict
(
pipeline
=
train_pipeline
))
test_dataloader
=
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
))
val_dataloader
=
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
))
# training schedule for 1x
train_cfg
=
dict
(
type
=
'EpochBasedTrainLoop'
,
max_epochs
=
12
,
val_interval
=
1
)
val_cfg
=
dict
(
type
=
'ValLoop'
)
test_cfg
=
dict
(
type
=
'TestLoop'
)
# learning rate
param_scheduler
=
[
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'img'
]),
])
type
=
'MultiStepLR'
,
begin
=
0
,
end
=
12
,
by_epoch
=
True
,
milestones
=
[
8
,
11
],
gamma
=
0.1
)
]
data
=
dict
(
samples_per_gpu
=
8
,
workers_per_gpu
=
4
,
t
rain
=
dict
(
pipeline
=
train_pipeline
)
,
val
=
dict
(
pipeline
=
test_pipeline
),
test
=
dict
(
pipeline
=
test_pipeli
ne
)
)
# optimizer
optim_wrapper
=
dict
(
t
ype
=
'OptimWrapper'
,
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
2.5e-4
),
clip_grad
=
No
ne
)
mmdet3d/datasets/__init__.py
View file @
b496f579
# Copyright (c) OpenMMLab. All rights reserved.
from
.builder
import
DATASETS
,
PIPELINES
,
build_dataset
from
.convert_utils
import
get_2d_boxes
from
.dataset_wrappers
import
CBGSDataset
from
.det3d_dataset
import
Det3DDataset
from
.kitti_dataset
import
KittiDataset
...
...
@@ -41,5 +42,5 @@ __all__ = [
'LoadPointsFromMultiSweeps'
,
'WaymoDataset'
,
'BackgroundPointsFilter'
,
'VoxelBasedPointSampler'
,
'get_loading_pipeline'
,
'RandomDropPointsColor'
,
'RandomJitterPoints'
,
'ObjectNameFilter'
,
'AffineResize'
,
'RandomShiftScale'
,
'LoadPointsFromDict'
,
'PIPELINES'
'RandomShiftScale'
,
'LoadPointsFromDict'
,
'PIPELINES'
,
'get_2d_boxes'
]
mmdet3d/datasets/convert_utils.py
0 → 100644
View file @
b496f579
# Copyright (c) OpenMMLab. All rights reserved.
from
collections
import
OrderedDict
from
typing
import
List
,
Tuple
,
Union
import
numpy
as
np
from
nuscenes.utils.geometry_utils
import
view_points
from
pyquaternion
import
Quaternion
from
shapely.geometry
import
MultiPoint
,
box
from
mmdet3d.core.bbox
import
points_cam2img
nus_categories
=
(
'car'
,
'truck'
,
'trailer'
,
'bus'
,
'construction_vehicle'
,
'bicycle'
,
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
)
nus_attributes
=
(
'cycle.with_rider'
,
'cycle.without_rider'
,
'pedestrian.moving'
,
'pedestrian.standing'
,
'pedestrian.sitting_lying_down'
,
'vehicle.moving'
,
'vehicle.parked'
,
'vehicle.stopped'
,
'None'
)
NameMapping
=
{
'movable_object.barrier'
:
'barrier'
,
'vehicle.bicycle'
:
'bicycle'
,
'vehicle.bus.bendy'
:
'bus'
,
'vehicle.bus.rigid'
:
'bus'
,
'vehicle.car'
:
'car'
,
'vehicle.construction'
:
'construction_vehicle'
,
'vehicle.motorcycle'
:
'motorcycle'
,
'human.pedestrian.adult'
:
'pedestrian'
,
'human.pedestrian.child'
:
'pedestrian'
,
'human.pedestrian.construction_worker'
:
'pedestrian'
,
'human.pedestrian.police_officer'
:
'pedestrian'
,
'movable_object.trafficcone'
:
'traffic_cone'
,
'vehicle.trailer'
:
'trailer'
,
'vehicle.truck'
:
'truck'
}
def
get_2d_boxes
(
nusc
,
sample_data_token
:
str
,
visibilities
:
List
[
str
]):
"""Get the 2D annotation records for a given `sample_data_token`.
Args:
sample_data_token (str): Sample data token belonging to a camera
keyframe.
visibilities (list[str]): Visibility filter.
Return:
list[dict]: List of 2D annotation record that belongs to the input
`sample_data_token`.
"""
# Get the sample data and the sample corresponding to that sample data.
sd_rec
=
nusc
.
get
(
'sample_data'
,
sample_data_token
)
assert
sd_rec
[
'sensor_modality'
]
==
'camera'
,
'Error: get_2d_boxes only works'
\
' for camera sample_data!'
if
not
sd_rec
[
'is_key_frame'
]:
raise
ValueError
(
'The 2D re-projections are available only for keyframes.'
)
s_rec
=
nusc
.
get
(
'sample'
,
sd_rec
[
'sample_token'
])
# Get the calibrated sensor and ego pose
# record to get the transformation matrices.
cs_rec
=
nusc
.
get
(
'calibrated_sensor'
,
sd_rec
[
'calibrated_sensor_token'
])
pose_rec
=
nusc
.
get
(
'ego_pose'
,
sd_rec
[
'ego_pose_token'
])
camera_intrinsic
=
np
.
array
(
cs_rec
[
'camera_intrinsic'
])
# Get all the annotation with the specified visibilties.
ann_recs
=
[
nusc
.
get
(
'sample_annotation'
,
token
)
for
token
in
s_rec
[
'anns'
]
]
ann_recs
=
[
ann_rec
for
ann_rec
in
ann_recs
if
(
ann_rec
[
'visibility_token'
]
in
visibilities
)
]
repro_recs
=
[]
for
ann_rec
in
ann_recs
:
# Augment sample_annotation with token information.
ann_rec
[
'sample_annotation_token'
]
=
ann_rec
[
'token'
]
ann_rec
[
'sample_data_token'
]
=
sample_data_token
# Get the box in global coordinates.
box
=
nusc
.
get_box
(
ann_rec
[
'token'
])
# Move them to the ego-pose frame.
box
.
translate
(
-
np
.
array
(
pose_rec
[
'translation'
]))
box
.
rotate
(
Quaternion
(
pose_rec
[
'rotation'
]).
inverse
)
# Move them to the calibrated sensor frame.
box
.
translate
(
-
np
.
array
(
cs_rec
[
'translation'
]))
box
.
rotate
(
Quaternion
(
cs_rec
[
'rotation'
]).
inverse
)
# Filter out the corners that are not in front of the calibrated
# sensor.
corners_3d
=
box
.
corners
()
in_front
=
np
.
argwhere
(
corners_3d
[
2
,
:]
>
0
).
flatten
()
corners_3d
=
corners_3d
[:,
in_front
]
# Project 3d box to 2d.
corner_coords
=
view_points
(
corners_3d
,
camera_intrinsic
,
True
).
T
[:,
:
2
].
tolist
()
# Keep only corners that fall within the image.
final_coords
=
post_process_coords
(
corner_coords
)
# Skip if the convex hull of the re-projected corners
# does not intersect the image canvas.
if
final_coords
is
None
:
continue
else
:
min_x
,
min_y
,
max_x
,
max_y
=
final_coords
# Generate dictionary record to be included in the .json file.
repro_rec
=
generate_record
(
ann_rec
,
min_x
,
min_y
,
max_x
,
max_y
,
sample_data_token
,
sd_rec
[
'filename'
])
# if repro_rec is None, we do not append it into repre_recs
if
repro_rec
is
not
None
:
loc
=
box
.
center
.
tolist
()
dim
=
box
.
wlh
dim
[[
0
,
1
,
2
]]
=
dim
[[
1
,
2
,
0
]]
# convert wlh to our lhw
dim
=
dim
.
tolist
()
rot
=
box
.
orientation
.
yaw_pitch_roll
[
0
]
rot
=
[
-
rot
]
# convert the rot to our cam coordinate
global_velo2d
=
nusc
.
box_velocity
(
box
.
token
)[:
2
]
global_velo3d
=
np
.
array
([
*
global_velo2d
,
0.0
])
e2g_r_mat
=
Quaternion
(
pose_rec
[
'rotation'
]).
rotation_matrix
c2e_r_mat
=
Quaternion
(
cs_rec
[
'rotation'
]).
rotation_matrix
cam_velo3d
=
global_velo3d
@
np
.
linalg
.
inv
(
e2g_r_mat
).
T
@
np
.
linalg
.
inv
(
c2e_r_mat
).
T
velo
=
cam_velo3d
[
0
::
2
].
tolist
()
repro_rec
[
'bbox_3d'
]
=
loc
+
dim
+
rot
repro_rec
[
'velocity'
]
=
velo
center_3d
=
np
.
array
(
loc
).
reshape
([
1
,
3
])
center_2d_with_depth
=
points_cam2img
(
center_3d
,
camera_intrinsic
,
with_depth
=
True
)
center_2d_with_depth
=
center_2d_with_depth
.
squeeze
().
tolist
()
repro_rec
[
'center_2d'
]
=
center_2d_with_depth
[:
2
]
repro_rec
[
'depth'
]
=
center_2d_with_depth
[
2
]
# normalized center2D + depth
# if samples with depth < 0 will be removed
if
repro_rec
[
'depth'
]
<=
0
:
continue
ann_token
=
nusc
.
get
(
'sample_annotation'
,
box
.
token
)[
'attribute_tokens'
]
if
len
(
ann_token
)
==
0
:
attr_name
=
'None'
else
:
attr_name
=
nusc
.
get
(
'attribute'
,
ann_token
[
0
])[
'name'
]
attr_id
=
nus_attributes
.
index
(
attr_name
)
# repro_rec['attribute_name'] = attr_name
repro_rec
[
'attr_label'
]
=
attr_id
repro_recs
.
append
(
repro_rec
)
return
repro_recs
def
post_process_coords
(
corner_coords
:
List
,
imsize
:
Tuple
[
int
,
int
]
=
(
1600
,
900
)
)
->
Union
[
Tuple
[
float
,
float
,
float
,
float
],
None
]:
"""Get the intersection of the convex hull of the reprojected bbox corners
and the image canvas, return None if no intersection.
Args:
corner_coords (list[int]): Corner coordinates of reprojected
bounding box.
imsize (tuple[int]): Size of the image canvas.
Return:
tuple [float]: Intersection of the convex hull of the 2D box
corners and the image canvas.
"""
polygon_from_2d_box
=
MultiPoint
(
corner_coords
).
convex_hull
img_canvas
=
box
(
0
,
0
,
imsize
[
0
],
imsize
[
1
])
if
polygon_from_2d_box
.
intersects
(
img_canvas
):
img_intersection
=
polygon_from_2d_box
.
intersection
(
img_canvas
)
intersection_coords
=
np
.
array
(
[
coord
for
coord
in
img_intersection
.
exterior
.
coords
])
min_x
=
min
(
intersection_coords
[:,
0
])
min_y
=
min
(
intersection_coords
[:,
1
])
max_x
=
max
(
intersection_coords
[:,
0
])
max_y
=
max
(
intersection_coords
[:,
1
])
return
min_x
,
min_y
,
max_x
,
max_y
else
:
return
None
def
generate_record
(
ann_rec
:
dict
,
x1
:
float
,
y1
:
float
,
x2
:
float
,
y2
:
float
,
sample_data_token
:
str
,
filename
:
str
)
->
OrderedDict
:
"""Generate one 2D annotation record given various information on top of
the 2D bounding box coordinates.
Args:
ann_rec (dict): Original 3d annotation record.
x1 (float): Minimum value of the x coordinate.
y1 (float): Minimum value of the y coordinate.
x2 (float): Maximum value of the x coordinate.
y2 (float): Maximum value of the y coordinate.
sample_data_token (str): Sample data token.
filename (str):The corresponding image file where the annotation
is present.
Returns:
dict: A sample mono3D annotation record.
- bbox_label (int): 2d box label id
- bbox_label_3d (int): 3d box label id
- bbox (list[float]): left x, top y, right x, bottom y
of 2d box
- bbox_3d_isvalid (bool): whether the box is valid
"""
repro_rec
=
OrderedDict
()
repro_rec
[
'sample_data_token'
]
=
sample_data_token
coco_rec
=
dict
()
relevant_keys
=
[
'attribute_tokens'
,
'category_name'
,
'instance_token'
,
'next'
,
'num_lidar_pts'
,
'num_radar_pts'
,
'prev'
,
'sample_annotation_token'
,
'sample_data_token'
,
'visibility_token'
,
]
for
key
,
value
in
ann_rec
.
items
():
if
key
in
relevant_keys
:
repro_rec
[
key
]
=
value
repro_rec
[
'bbox_corners'
]
=
[
x1
,
y1
,
x2
,
y2
]
repro_rec
[
'filename'
]
=
filename
if
repro_rec
[
'category_name'
]
not
in
NameMapping
:
return
None
cat_name
=
NameMapping
[
repro_rec
[
'category_name'
]]
coco_rec
[
'bbox_label'
]
=
nus_categories
.
index
(
cat_name
)
coco_rec
[
'bbox_label_3d'
]
=
nus_categories
.
index
(
cat_name
)
coco_rec
[
'bbox'
]
=
[
x1
,
y1
,
x2
,
y2
]
coco_rec
[
'bbox_3d_isvalid'
]
=
True
return
coco_rec
mmdet3d/datasets/det3d_dataset.py
View file @
b496f579
...
...
@@ -197,6 +197,7 @@ class Det3DDataset(BaseDataset):
ann_info
=
dict
()
for
ann_name
in
keys
:
temp_anns
=
[
item
[
ann_name
]
for
item
in
instances
]
# map the original dataset label to training label
if
'label'
in
ann_name
:
temp_anns
=
[
self
.
label_mapping
[
item
]
for
item
in
temp_anns
...
...
mmdet3d/datasets/nuscenes_dataset.py
View file @
b496f579
# Copyright (c) OpenMMLab. All rights reserved.
from
os
import
path
as
osp
from
typing
import
Dict
,
List
import
numpy
as
np
from
mmdet3d.core.bbox.structures.cam_box3d
import
CameraInstance3DBoxes
from
mmdet3d.registry
import
DATASETS
from
..core.bbox
import
LiDARInstance3DBoxes
from
.det3d_dataset
import
Det3DDataset
...
...
@@ -53,6 +55,7 @@ class NuScenesDataset(Det3DDataset):
def
__init__
(
self
,
data_root
:
str
,
ann_file
:
str
,
task
:
str
=
'3d'
,
pipeline
:
List
[
dict
]
=
None
,
box_type_3d
:
str
=
'LiDAR'
,
modality
:
Dict
=
dict
(
...
...
@@ -66,7 +69,12 @@ class NuScenesDataset(Det3DDataset):
**
kwargs
):
self
.
use_valid_flag
=
use_valid_flag
self
.
with_velocity
=
with_velocity
assert
box_type_3d
.
lower
()
==
'lidar'
# TODO: Redesign multi-view data process in the future
assert
task
in
(
'3d'
,
'mono3d'
,
'multi-view'
)
self
.
task
=
task
assert
box_type_3d
.
lower
()
in
(
'lidar'
,
'camera'
)
super
().
__init__
(
data_root
=
data_root
,
ann_file
=
ann_file
,
...
...
@@ -97,6 +105,7 @@ class NuScenesDataset(Det3DDataset):
anns_results
[
'gt_bboxes_3d'
]
=
np
.
zeros
((
0
,
7
),
dtype
=
np
.
float32
)
anns_results
[
'gt_labels_3d'
]
=
np
.
zeros
(
0
,
dtype
=
np
.
int64
)
return
anns_results
if
self
.
use_valid_flag
:
mask
=
ann_info
[
'bbox_3d_isvalid'
]
else
:
...
...
@@ -104,6 +113,22 @@ class NuScenesDataset(Det3DDataset):
gt_bboxes_3d
=
ann_info
[
'gt_bboxes_3d'
][
mask
]
gt_labels_3d
=
ann_info
[
'gt_labels_3d'
][
mask
]
if
'gt_bboxes'
in
ann_info
:
gt_bboxes
=
ann_info
[
'gt_bboxes'
][
mask
]
gt_labels
=
ann_info
[
'gt_labels'
][
mask
]
attr_labels
=
ann_info
[
'attr_labels'
][
mask
]
else
:
gt_bboxes
=
np
.
zeros
((
0
,
4
),
dtype
=
np
.
float32
)
gt_labels
=
np
.
array
([],
dtype
=
np
.
int64
)
attr_labels
=
np
.
array
([],
dtype
=
np
.
int64
)
if
'centers_2d'
in
ann_info
:
centers_2d
=
ann_info
[
'centers_2d'
][
mask
]
depths
=
ann_info
[
'depths'
][
mask
]
else
:
centers_2d
=
np
.
zeros
((
0
,
2
),
dtype
=
np
.
float32
)
depths
=
np
.
zeros
((
0
),
dtype
=
np
.
float32
)
if
self
.
with_velocity
:
gt_velocity
=
ann_info
[
'velocity'
][
mask
]
nan_mask
=
np
.
isnan
(
gt_velocity
[:,
0
])
...
...
@@ -112,11 +137,82 @@ class NuScenesDataset(Det3DDataset):
# the nuscenes box center is [0.5, 0.5, 0.5], we change it to be
# the same as KITTI (0.5, 0.5, 0)
# TODO: Unify the coordinates
if
self
.
task
==
'mono3d'
:
gt_bboxes_3d
=
CameraInstance3DBoxes
(
gt_bboxes_3d
,
box_dim
=
gt_bboxes_3d
.
shape
[
-
1
],
origin
=
(
0.5
,
0.5
,
0.5
))
else
:
gt_bboxes_3d
=
LiDARInstance3DBoxes
(
gt_bboxes_3d
,
box_dim
=
gt_bboxes_3d
.
shape
[
-
1
],
origin
=
(
0.5
,
0.5
,
0.5
)).
convert_to
(
self
.
box_mode_3d
)
anns_results
=
dict
(
gt_bboxes_3d
=
gt_bboxes_3d
,
gt_labels_3d
=
gt_labels_3d
)
gt_bboxes_3d
=
gt_bboxes_3d
,
gt_labels_3d
=
gt_labels_3d
,
gt_bboxes
=
gt_bboxes
,
gt_labels
=
gt_labels
,
attr_labels
=
attr_labels
,
centers_2d
=
centers_2d
,
depths
=
depths
)
return
anns_results
def
parse_data_info
(
self
,
info
:
dict
)
->
dict
:
"""Process the raw data info.
The only difference with it in `Det3DDataset`
is the specific process for `plane`.
Args:
info (dict): Raw info dict.
Returns:
dict: Has `ann_info` in training stage. And
all path has been converted to absolute path.
"""
if
self
.
task
==
'mono3d'
:
data_list
=
[]
if
self
.
modality
[
'use_lidar'
]:
info
[
'lidar_points'
][
'lidar_path'
]
=
\
osp
.
join
(
self
.
data_prefix
.
get
(
'pts'
,
''
),
info
[
'lidar_points'
][
'lidar_path'
])
if
self
.
modality
[
'use_camera'
]:
for
cam_id
,
img_info
in
info
[
'images'
].
items
():
if
'img_path'
in
img_info
:
if
cam_id
in
self
.
data_prefix
:
cam_prefix
=
self
.
data_prefix
[
cam_id
]
else
:
cam_prefix
=
self
.
data_prefix
.
get
(
'img'
,
''
)
img_info
[
'img_path'
]
=
osp
.
join
(
cam_prefix
,
img_info
[
'img_path'
])
for
idx
,
(
cam_id
,
img_info
)
in
enumerate
(
info
[
'images'
].
items
()):
camera_info
=
dict
()
camera_info
[
'images'
]
=
dict
()
camera_info
[
'images'
][
cam_id
]
=
img_info
if
'cam_instances'
in
info
and
cam_id
in
info
[
'cam_instances'
]:
camera_info
[
'instances'
]
=
info
[
'cam_instances'
][
cam_id
]
else
:
camera_info
[
'instances'
]
=
[]
# TODO: check whether to change sample_idx for 6 cameras
# in one frame
camera_info
[
'sample_idx'
]
=
info
[
'sample_idx'
]
*
6
+
idx
camera_info
[
'token'
]
=
info
[
'token'
]
camera_info
[
'ego2global'
]
=
info
[
'ego2global'
]
if
not
self
.
test_mode
:
# used in traing
camera_info
[
'ann_info'
]
=
self
.
parse_ann_info
(
camera_info
)
if
self
.
test_mode
and
self
.
load_eval_anns
:
camera_info
[
'eval_ann_info'
]
=
\
self
.
parse_ann_info
(
camera_info
)
data_list
.
append
(
camera_info
)
return
data_list
else
:
data_info
=
super
().
parse_data_info
(
info
)
return
data_info
mmdet3d/datasets/pipelines/formating.py
View file @
b496f579
...
...
@@ -122,7 +122,7 @@ class Pack3DDetInputs(BaseTransform):
for
key
in
[
'proposals'
,
'gt_bboxes'
,
'gt_bboxes_ignore'
,
'gt_labels'
,
'gt_labels_3d'
,
'attr_labels'
,
'pts_instance_mask'
,
'pts_semantic_mask'
,
'centers2d'
,
'depths'
'pts_semantic_mask'
,
'centers
_
2d'
,
'depths'
]:
if
key
not
in
results
:
continue
...
...
mmdet3d/datasets/pipelines/loading.py
View file @
b496f579
...
...
@@ -86,7 +86,7 @@ class LoadImageFromFileMono3D(LoadImageFromFile):
:class:`LoadImageFromFile`.
"""
def
__call__
(
self
,
results
)
:
def
transform
(
self
,
results
:
dict
)
->
dict
:
"""Call functions to load image and get image meta information.
Args:
...
...
@@ -95,8 +95,32 @@ class LoadImageFromFileMono3D(LoadImageFromFile):
Returns:
dict: The dict contains loaded image and meta information.
"""
super
().
__call__
(
results
)
results
[
'cam2img'
]
=
results
[
'img_info'
][
'cam_intrinsic'
]
# TODO: load different camera image from data info,
# for kitti dataset, we load 'CAM2' image.
# for nuscenes dataset, we load 'CAM_FRONT' image.
if
'CAM2'
in
results
[
'images'
]:
filename
=
results
[
'images'
][
'CAM2'
][
'img_path'
]
results
[
'cam2img'
]
=
results
[
'images'
][
'CAM2'
][
'cam2img'
]
elif
len
(
list
(
results
[
'images'
].
keys
()))
==
1
:
camera_type
=
list
(
results
[
'images'
].
keys
())[
0
]
filename
=
results
[
'images'
][
camera_type
][
'img_path'
]
results
[
'cam2img'
]
=
results
[
'images'
][
camera_type
][
'cam2img'
]
else
:
raise
NotImplementedError
(
'Currently we only support load image from kitti and'
'nuscenes datasets'
)
img_bytes
=
self
.
file_client
.
get
(
filename
)
img
=
mmcv
.
imfrombytes
(
img_bytes
,
flag
=
self
.
color_type
,
backend
=
self
.
imdecode_backend
)
if
self
.
to_float32
:
img
=
img
.
astype
(
np
.
float32
)
results
[
'img'
]
=
img
results
[
'img_shape'
]
=
img
.
shape
[:
2
]
results
[
'ori_shape'
]
=
img
.
shape
[:
2
]
return
results
...
...
@@ -608,6 +632,34 @@ class LoadAnnotations3D(LoadAnnotations):
self
.
with_seg_3d
=
with_seg_3d
self
.
seg_3d_dtype
=
seg_3d_dtype
def
_load_bboxes
(
self
,
results
:
dict
)
->
None
:
"""Private function to load bounding box annotations.
Rewrite '_load_bboxes` since mmdet3d uses 'parse_anno_info' in
datasets.
Args:
results (dict): Result dict from :obj:`mmdet3d.CustomDataset`.
Returns:
dict: The dict contains loaded bounding box annotations.
"""
results
[
'gt_bboxes'
]
=
results
[
'ann_info'
][
'gt_bboxes'
]
def
_load_labels
(
self
,
results
:
dict
)
->
None
:
"""Private function to load label annotations.
Rewrite '_load_bboxes` since mmdet3d uses 'parse_anno_info' in
datasets.
Args:
results (dict): Result dict from :obj:`mmdet3d.CustomDataset`.
Returns:
dict: The dict contains loaded label annotations.
"""
results
[
'gt_labels'
]
=
results
[
'ann_info'
][
'gt_labels'
]
def
_load_bboxes_3d
(
self
,
results
:
dict
)
->
dict
:
"""Private function to move the 3D bounding box annotation from
`ann_info` field to the root of `results`.
...
...
mmdet3d/datasets/pipelines/transforms_3d.py
View file @
b496f579
...
...
@@ -1579,7 +1579,7 @@ class VoxelBasedPointSampler(object):
@
TRANSFORMS
.
register_module
()
class
AffineResize
(
object
):
class
AffineResize
(
BaseTransform
):
"""Get the affine transform matrices to the target size.
Different from :class:`RandomAffine` in MMDetection, this class can
...
...
@@ -1596,13 +1596,16 @@ class AffineResize(object):
outside the border of the image. Defaults to True.
"""
def
__init__
(
self
,
img_scale
,
down_ratio
,
bbox_clip_border
=
True
):
def
__init__
(
self
,
img_scale
:
Tuple
,
down_ratio
:
int
,
bbox_clip_border
:
bool
=
True
)
->
None
:
self
.
img_scale
=
img_scale
self
.
down_ratio
=
down_ratio
self
.
bbox_clip_border
=
bbox_clip_border
def
__call__
(
self
,
results
)
:
def
transform
(
self
,
results
:
dict
)
->
dict
:
"""Call function to do affine transform to input image and labels.
Args:
...
...
@@ -1647,30 +1650,29 @@ class AffineResize(object):
results
[
'pad_shape'
]
=
img
.
shape
results
[
'trans_mat'
]
=
trans_mat
if
'gt_bboxes'
in
results
:
self
.
_affine_bboxes
(
results
,
trans_affine
)
if
'centers2d'
in
results
:
centers2d
=
self
.
_affine_transform
(
results
[
'centers2d'
],
if
'centers
_
2d'
in
results
:
centers2d
=
self
.
_affine_transform
(
results
[
'centers
_
2d'
],
trans_affine
)
valid_index
=
(
centers2d
[:,
0
]
>
0
)
&
(
centers2d
[:,
0
]
<
self
.
img_scale
[
0
])
&
(
centers2d
[:,
1
]
>
0
)
&
(
centers2d
[:,
1
]
<
self
.
img_scale
[
1
])
results
[
'centers2d'
]
=
centers2d
[
valid_index
]
results
[
'centers
_
2d'
]
=
centers2d
[
valid_index
]
for
key
in
results
.
get
(
'bbox_fields'
,
[]):
if
key
in
[
'gt_bboxes'
]:
results
[
key
]
=
results
[
key
][
valid_index
]
if
'gt_bboxes'
in
results
:
results
[
'gt_bboxes'
]
=
results
[
'gt_bboxes'
][
valid_index
]
if
'gt_labels'
in
results
:
results
[
'gt_labels'
]
=
results
[
'gt_labels'
][
valid_index
]
results
[
'gt_labels'
]
=
results
[
'gt_labels'
][
valid_index
]
if
'gt_masks'
in
results
:
raise
NotImplementedError
(
'AffineResize only supports bbox.'
)
for
key
in
results
.
get
(
'bbox3d_fields'
,
[])
:
if
key
in
[
'gt_bboxes_3d'
]
:
results
[
key
].
tensor
=
results
[
key
].
tensor
[
valid_index
]
if
'gt_bboxes_3d'
in
results
:
results
[
'gt_bboxes_3d'
]
.
tensor
=
results
[
'gt_bboxes_3d'
].
tensor
[
valid_index
]
if
'gt_labels_3d'
in
results
:
results
[
'gt_labels_3d'
]
=
results
[
'gt_labels_3d'
][
valid_index
]
...
...
@@ -1679,7 +1681,7 @@ class AffineResize(object):
return
results
def
_affine_bboxes
(
self
,
results
,
matrix
)
:
def
_affine_bboxes
(
self
,
results
:
dict
,
matrix
:
np
.
ndarray
)
->
None
:
"""Affine transform bboxes to input image.
Args:
...
...
@@ -1689,20 +1691,18 @@ class AffineResize(object):
shape: (3, 3)
"""
for
key
in
results
.
get
(
'bbox_fields'
,
[]):
bboxes
=
results
[
key
]
bboxes
=
results
[
'gt_bboxes'
]
bboxes
[:,
:
2
]
=
self
.
_affine_transform
(
bboxes
[:,
:
2
],
matrix
)
bboxes
[:,
2
:]
=
self
.
_affine_transform
(
bboxes
[:,
2
:],
matrix
)
if
self
.
bbox_clip_border
:
bboxes
[:,
[
0
,
2
]]
=
bboxes
[:,
[
0
,
2
]].
clip
(
0
,
self
.
img_scale
[
0
]
-
1
)
bboxes
[:,
[
1
,
3
]]
=
bboxes
[:,
[
1
,
3
]].
clip
(
0
,
self
.
img_scale
[
1
]
-
1
)
results
[
key
]
=
bboxes
def
_affine_transform
(
self
,
points
,
matrix
):
bboxes
[:,
[
0
,
2
]]
=
bboxes
[:,
[
0
,
2
]].
clip
(
0
,
self
.
img_scale
[
0
]
-
1
)
bboxes
[:,
[
1
,
3
]]
=
bboxes
[:,
[
1
,
3
]].
clip
(
0
,
self
.
img_scale
[
1
]
-
1
)
results
[
'gt_bboxes'
]
=
bboxes
def
_affine_transform
(
self
,
points
:
np
.
ndarray
,
matrix
:
np
.
ndarray
)
->
np
.
ndarray
:
"""Affine transform bbox points to input image.
Args:
...
...
@@ -1721,7 +1721,8 @@ class AffineResize(object):
affined_points
=
np
.
matmul
(
matrix
,
hom_points_2d
).
T
return
affined_points
[:,
:
2
]
def
_get_transform_matrix
(
self
,
center
,
scale
,
output_scale
):
def
_get_transform_matrix
(
self
,
center
:
Tuple
,
scale
:
Tuple
,
output_scale
:
Tuple
[
float
])
->
np
.
ndarray
:
"""Get affine transform matrix.
Args:
...
...
@@ -1756,7 +1757,8 @@ class AffineResize(object):
return
matrix
.
astype
(
np
.
float32
)
def
_get_ref_point
(
self
,
ref_point1
,
ref_point2
):
def
_get_ref_point
(
self
,
ref_point1
:
np
.
ndarray
,
ref_point2
:
np
.
ndarray
)
->
np
.
ndarray
:
"""Get reference point to calculate affine transform matrix.
While using opencv to calculate the affine matrix, we need at least
...
...
@@ -1775,7 +1777,7 @@ class AffineResize(object):
@
TRANSFORMS
.
register_module
()
class
RandomShiftScale
(
object
):
class
RandomShiftScale
(
BaseTransform
):
"""Random shift scale.
Different from the normal shift and scale function, it doesn't
...
...
@@ -1788,12 +1790,12 @@ class RandomShiftScale(object):
aug_prob (float): The shifting and scaling probability.
"""
def
__init__
(
self
,
shift_scale
,
aug_prob
):
def
__init__
(
self
,
shift_scale
:
Tuple
[
float
]
,
aug_prob
:
float
):
self
.
shift_scale
=
shift_scale
self
.
aug_prob
=
aug_prob
def
__call__
(
self
,
results
)
:
def
transform
(
self
,
results
:
dict
)
->
dict
:
"""Call function to record random shift and scale infos.
Args:
...
...
mmdet3d/metrics/kitti_metric.py
View file @
b496f579
...
...
@@ -45,6 +45,7 @@ class KittiMetric(BaseMetric):
def
__init__
(
self
,
ann_file
:
str
,
metric
:
Union
[
str
,
List
[
str
]]
=
'bbox'
,
pred_box_type_3d
:
str
=
'LiDAR'
,
pcd_limit_range
:
List
[
float
]
=
[
0
,
-
40
,
-
3
,
70.4
,
40
,
0.0
],
prefix
:
Optional
[
str
]
=
None
,
pklfile_prefix
:
str
=
None
,
...
...
@@ -57,6 +58,7 @@ class KittiMetric(BaseMetric):
self
.
ann_file
=
ann_file
self
.
pklfile_prefix
=
pklfile_prefix
self
.
submission_prefix
=
submission_prefix
self
.
pred_box_type_3d
=
pred_box_type_3d
allowed_metrics
=
[
'bbox'
,
'img_bbox'
,
'mAP'
]
self
.
metrics
=
metric
if
isinstance
(
metric
,
list
)
else
[
metric
]
...
...
mmdet3d/metrics/nuscenes_metric.py
View file @
b496f579
...
...
@@ -7,12 +7,15 @@ from typing import Dict, List, Optional, Sequence, Tuple, Union
import
mmcv
import
numpy
as
np
import
pyquaternion
import
torch
from
mmengine.evaluator
import
BaseMetric
from
mmengine.logging
import
MMLogger
from
nuscenes.eval.detection.config
import
config_factory
from
nuscenes.eval.detection.data_classes
import
DetectionConfig
from
nuscenes.utils.data_classes
import
Box
as
NuScenesBox
from
mmdet3d.core
import
bbox3d2result
,
box3d_multiclass_nms
,
xywhr2xyxyr
from
mmdet3d.core.bbox
import
CameraInstance3DBoxes
,
LiDARInstance3DBoxes
from
mmdet3d.registry
import
METRICS
...
...
@@ -288,17 +291,140 @@ class NuScenesMetric(BaseMetric):
for
name
in
results
[
0
]:
if
'pred'
in
name
and
'3d'
in
name
and
name
[
0
]
!=
'_'
:
# format result of model output in Det3dDataSample,
# include 'pred_instances_3d','pts_pred_instances_3d',
# 'img_pred_instances_3d'
print
(
f
'
\n
Formating bboxes of
{
name
}
'
)
results_
=
[
out
[
name
]
for
out
in
results
]
tmp_file_
=
osp
.
join
(
jsonfile_prefix
,
name
)
result_dict
[
name
]
=
self
.
_format_bbox
(
results_
,
sample_id_list
,
classes
,
tmp_file_
)
box_type_3d
=
type
(
results_
[
0
][
'bboxes_3d'
])
if
box_type_3d
==
LiDARInstance3DBoxes
:
result_dict
[
name
]
=
self
.
_format_lidar_bbox
(
results_
,
sample_id_list
,
classes
,
tmp_file_
)
elif
box_type_3d
==
CameraInstance3DBoxes
:
result_dict
[
name
]
=
self
.
_format_camera_bbox
(
results_
,
sample_id_list
,
classes
,
tmp_file_
)
return
result_dict
,
tmp_dir
def
_format_bbox
(
self
,
def
_format_camera_bbox
(
self
,
results
:
List
[
dict
],
sample_id_list
:
List
[
int
],
classes
:
List
[
str
]
=
None
,
jsonfile_prefix
:
str
=
None
)
->
str
:
"""Convert the results to the standard format.
Args:
results (list[dict]): Testing results of the dataset.
jsonfile_prefix (str): The prefix of the output jsonfile.
You can specify the output directory/filename by
modifying the jsonfile_prefix. Default: None.
Returns:
str: Path of the output json file.
"""
nusc_annos
=
{}
print
(
'Start to convert detection format...'
)
# Camera types in Nuscenes datasets
camera_types
=
[
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_FRONT_LEFT'
,
'CAM_BACK'
,
'CAM_BACK_LEFT'
,
'CAM_BACK_RIGHT'
,
]
CAM_NUM
=
6
for
i
,
det
in
enumerate
(
mmcv
.
track_iter_progress
(
results
)):
sample_id
=
sample_id_list
[
i
]
camera_type_id
=
sample_id
%
CAM_NUM
if
camera_type_id
==
0
:
boxes_per_frame
=
[]
attrs_per_frame
=
[]
# need to merge results from images of the same sample
annos
=
[]
boxes
,
attrs
=
output_to_nusc_box
(
det
)
sample_token
=
self
.
data_infos
[
sample_id
][
'token'
]
camera_type
=
camera_types
[
camera_type_id
]
boxes
,
attrs
=
cam_nusc_box_to_global
(
self
.
data_infos
[
sample_id
-
camera_type_id
],
boxes
,
attrs
,
camera_type
,
classes
,
self
.
eval_detection_configs
)
boxes_per_frame
.
extend
(
boxes
)
attrs_per_frame
.
extend
(
attrs
)
# Remove redundant predictions caused by overlap of images
if
(
sample_id
+
1
)
%
CAM_NUM
!=
0
:
continue
boxes
=
global_nusc_box_to_cam
(
self
.
data_infos
[
sample_id
+
1
-
CAM_NUM
],
boxes_per_frame
,
classes
,
self
.
eval_detection_configs
)
cam_boxes3d
,
scores
,
labels
=
nusc_box_to_cam_box3d
(
boxes
)
# box nms 3d over 6 images in a frame
# TODO: move this global setting into config
nms_cfg
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_pre
=
4096
,
nms_thr
=
0.05
,
score_thr
=
0.01
,
min_bbox_size
=
0
,
max_per_frame
=
500
)
from
mmcv
import
Config
nms_cfg
=
Config
(
nms_cfg
)
cam_boxes3d_for_nms
=
xywhr2xyxyr
(
cam_boxes3d
.
bev
)
boxes3d
=
cam_boxes3d
.
tensor
# generate attr scores from attr labels
attrs
=
labels
.
new_tensor
([
attr
for
attr
in
attrs_per_frame
])
boxes3d
,
scores
,
labels
,
attrs
=
box3d_multiclass_nms
(
boxes3d
,
cam_boxes3d_for_nms
,
scores
,
nms_cfg
.
score_thr
,
nms_cfg
.
max_per_frame
,
nms_cfg
,
mlvl_attr_scores
=
attrs
)
cam_boxes3d
=
CameraInstance3DBoxes
(
boxes3d
,
box_dim
=
9
)
det
=
bbox3d2result
(
cam_boxes3d
,
scores
,
labels
,
attrs
)
boxes
,
attrs
=
output_to_nusc_box
(
det
)
boxes
,
attrs
=
cam_nusc_box_to_global
(
self
.
data_infos
[
sample_id
+
1
-
CAM_NUM
],
boxes
,
attrs
,
classes
,
self
.
eval_detection_configs
)
for
i
,
box
in
enumerate
(
boxes
):
name
=
classes
[
box
.
label
]
attr
=
self
.
get_attr_name
(
attrs
[
i
],
name
)
nusc_anno
=
dict
(
sample_token
=
sample_token
,
translation
=
box
.
center
.
tolist
(),
size
=
box
.
wlh
.
tolist
(),
rotation
=
box
.
orientation
.
elements
.
tolist
(),
velocity
=
box
.
velocity
[:
2
].
tolist
(),
detection_name
=
name
,
detection_score
=
box
.
score
,
attribute_name
=
attr
)
annos
.
append
(
nusc_anno
)
# other views results of the same frame should be concatenated
if
sample_token
in
nusc_annos
:
nusc_annos
[
sample_token
].
extend
(
annos
)
else
:
nusc_annos
[
sample_token
]
=
annos
nusc_submissions
=
{
'meta'
:
self
.
modality
,
'results'
:
nusc_annos
,
}
mmcv
.
mkdir_or_exist
(
jsonfile_prefix
)
res_path
=
osp
.
join
(
jsonfile_prefix
,
'results_nusc.json'
)
print
(
'Results writes to'
,
res_path
)
mmcv
.
dump
(
nusc_submissions
,
res_path
)
return
res_path
def
_format_lidar_bbox
(
self
,
results
:
List
[
dict
],
sample_id_list
:
List
[
int
],
classes
:
List
[
str
]
=
None
,
...
...
@@ -389,18 +515,26 @@ def output_to_nusc_box(detection: dict) -> List[NuScenesBox]:
bbox3d
=
detection
[
'bboxes_3d'
]
scores
=
detection
[
'scores_3d'
].
numpy
()
labels
=
detection
[
'labels_3d'
].
numpy
()
attrs
=
None
if
'attr_labels'
in
detection
:
attrs
=
detection
[
'attr_labels'
].
numpy
()
box_gravity_center
=
bbox3d
.
gravity_center
.
numpy
()
box_dims
=
bbox3d
.
dims
.
numpy
()
box_yaw
=
bbox3d
.
yaw
.
numpy
()
box_list
=
[]
if
type
(
bbox3d
)
==
LiDARInstance3DBoxes
:
# our LiDAR coordinate system -> nuScenes box coordinate system
nus_box_dims
=
box_dims
[:,
[
1
,
0
,
2
]]
box_list
=
[]
for
i
in
range
(
len
(
bbox3d
)):
quat
=
pyquaternion
.
Quaternion
(
axis
=
[
0
,
0
,
1
],
radians
=
box_yaw
[
i
])
velocity
=
(
*
bbox3d
.
tensor
[
i
,
7
:
9
],
0.0
)
# velo_val = np.linalg.norm(box3d[i, 7:9])
# velo_ori = box3d[i, 6]
# velocity = (
# velo_val * np.cos(velo_ori), velo_val * np.sin(velo_ori), 0.0)
box
=
NuScenesBox
(
box_gravity_center
[
i
],
nus_box_dims
[
i
],
...
...
@@ -409,7 +543,31 @@ def output_to_nusc_box(detection: dict) -> List[NuScenesBox]:
score
=
scores
[
i
],
velocity
=
velocity
)
box_list
.
append
(
box
)
return
box_list
elif
type
(
bbox3d
)
==
CameraInstance3DBoxes
:
# our Camera coordinate system -> nuScenes box coordinate system
# convert the dim/rot to nuscbox convention
nus_box_dims
=
box_dims
[:,
[
2
,
0
,
1
]]
nus_box_yaw
=
-
box_yaw
for
i
in
range
(
len
(
bbox3d
)):
q1
=
pyquaternion
.
Quaternion
(
axis
=
[
0
,
0
,
1
],
radians
=
nus_box_yaw
[
i
])
q2
=
pyquaternion
.
Quaternion
(
axis
=
[
1
,
0
,
0
],
radians
=
np
.
pi
/
2
)
quat
=
q2
*
q1
velocity
=
(
bbox3d
.
tensor
[
i
,
7
],
0.0
,
bbox3d
.
tensor
[
i
,
8
])
box
=
NuScenesBox
(
box_gravity_center
[
i
],
nus_box_dims
[
i
],
quat
,
label
=
labels
[
i
],
score
=
scores
[
i
],
velocity
=
velocity
)
box_list
.
append
(
box
)
else
:
raise
NotImplementedError
(
f
'Do not support convert
{
type
(
bbox3d
)
}
bboxes'
'to standard NuScenesBoxes.'
)
return
box_list
,
attrs
def
lidar_nusc_box_to_global
(
...
...
@@ -448,3 +606,117 @@ def lidar_nusc_box_to_global(
box
.
translate
(
ego2global
[:
3
,
3
])
box_list
.
append
(
box
)
return
box_list
def
cam_nusc_box_to_global
(
info
:
dict
,
boxes
:
List
[
NuScenesBox
],
attrs
:
List
[
str
],
camera_type
:
str
,
classes
:
List
[
str
],
eval_configs
:
DetectionConfig
)
->
List
[
NuScenesBox
]:
"""Convert the box from camera to global coordinate.
Args:
info (dict): Info for a specific sample data, including the
calibration information.
boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.
attrs (list[str]): List of attributes.
camera_type (str): Type of camera.
classes (list[str]): Mapped classes in the evaluation.
eval_configs (object): Evaluation configuration object.
Returns:
list: List of standard NuScenesBoxes in the global
coordinate.
"""
box_list
=
[]
attr_list
=
[]
for
(
box
,
attr
)
in
zip
(
boxes
,
attrs
):
# Move box to ego vehicle coord system
cam2ego
=
np
.
array
(
info
[
'images'
][
camera_type
][
'cam2ego'
])
box
.
rotate
(
pyquaternion
.
Quaternion
(
matrix
=
cam2ego
,
rtol
=
1e-05
,
atol
=
1e-07
))
box
.
translate
(
cam2ego
[:
3
,
3
])
# filter det in ego.
cls_range_map
=
eval_configs
.
class_range
radius
=
np
.
linalg
.
norm
(
box
.
center
[:
2
],
2
)
det_range
=
cls_range_map
[
classes
[
box
.
label
]]
if
radius
>
det_range
:
continue
# Move box to global coord system
ego2global
=
np
.
array
(
info
[
'ego2global'
])
box
.
rotate
(
pyquaternion
.
Quaternion
(
matrix
=
ego2global
,
rtol
=
1e-05
,
atol
=
1e-07
))
box
.
translate
(
ego2global
[:
3
,
3
])
box_list
.
append
(
box
)
attr_list
.
append
(
attr
)
return
box_list
,
attr_list
def
global_nusc_box_to_cam
(
info
:
dict
,
boxes
:
List
[
NuScenesBox
],
classes
:
List
[
str
],
eval_configs
:
DetectionConfig
)
->
List
[
NuScenesBox
]:
"""Convert the box from global to camera coordinate.
Args:
info (dict): Info for a specific sample data, including the
calibration information.
boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.
classes (list[str]): Mapped classes in the evaluation.
eval_configs (object): Evaluation configuration object.
Returns:
list: List of standard NuScenesBoxes in the global
coordinate.
"""
box_list
=
[]
for
box
in
boxes
:
# Move box to ego vehicle coord system
ego2global
=
np
.
array
(
info
[
'ego2global'
])
box
.
translate
(
-
ego2global
[:
3
,
3
])
box
.
rotate
(
pyquaternion
.
Quaternion
(
matrix
=
ego2global
,
rtol
=
1e-05
,
atol
=
1e-07
).
inverse
)
# filter det in ego.
cls_range_map
=
eval_configs
.
class_range
radius
=
np
.
linalg
.
norm
(
box
.
center
[:
2
],
2
)
det_range
=
cls_range_map
[
classes
[
box
.
label
]]
if
radius
>
det_range
:
continue
# Move box to camera coord system
cam2ego
=
np
.
array
(
info
[
'images'
][
'CAM_FRONT'
][
'cam2ego'
])
box
.
translate
(
-
cam2ego
[:
3
,
:
3
])
box
.
rotate
(
pyquaternion
.
Quaternion
(
matrix
=
cam2ego
,
rtol
=
1e-05
,
atol
=
1e-07
).
inverse
)
box_list
.
append
(
box
)
return
box_list
def
nusc_box_to_cam_box3d
(
boxes
:
List
[
NuScenesBox
]):
"""Convert boxes from :obj:`NuScenesBox` to :obj:`CameraInstance3DBoxes`.
Args:
boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.
Returns:
tuple (:obj:`CameraInstance3DBoxes` | torch.Tensor | torch.Tensor):
Converted 3D bounding boxes, scores and labels.
"""
locs
=
torch
.
Tensor
([
b
.
center
for
b
in
boxes
]).
view
(
-
1
,
3
)
dims
=
torch
.
Tensor
([
b
.
wlh
for
b
in
boxes
]).
view
(
-
1
,
3
)
rots
=
torch
.
Tensor
([
b
.
orientation
.
yaw_pitch_roll
[
0
]
for
b
in
boxes
]).
view
(
-
1
,
1
)
velocity
=
torch
.
Tensor
([
b
.
velocity
[
0
::
2
]
for
b
in
boxes
]).
view
(
-
1
,
2
)
# convert nusbox to cambox convention
dims
[:,
[
0
,
1
,
2
]]
=
dims
[:,
[
1
,
2
,
0
]]
rots
=
-
rots
boxes_3d
=
torch
.
cat
([
locs
,
dims
,
rots
,
velocity
],
dim
=
1
).
cuda
()
cam_boxes3d
=
CameraInstance3DBoxes
(
boxes_3d
,
box_dim
=
9
,
origin
=
(
0.5
,
0.5
,
0.5
))
scores
=
torch
.
Tensor
([
b
.
score
for
b
in
boxes
]).
cuda
()
labels
=
torch
.
LongTensor
([
b
.
label
for
b
in
boxes
]).
cuda
()
nms_scores
=
scores
.
new_zeros
(
scores
.
shape
[
0
],
10
+
1
)
indices
=
labels
.
new_tensor
(
list
(
range
(
scores
.
shape
[
0
])))
nms_scores
[
indices
,
labels
]
=
scores
return
cam_boxes3d
,
nms_scores
,
labels
mmdet3d/models/data_preprocessors/data_preprocessor.py
View file @
b496f579
...
...
@@ -106,8 +106,7 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
if
'points'
in
inputs_dict
[
0
].
keys
():
points
=
[
input
[
'points'
]
for
input
in
inputs_dict
]
else
:
raise
KeyError
(
"Model input dict needs to include the 'points' key."
)
points
=
None
if
'img'
in
inputs_dict
[
0
].
keys
():
...
...
mmdet3d/models/dense_heads/anchor_free_mono3d_head.py
View file @
b496f579
# Copyright (c) OpenMMLab. All rights reserved.
from
abc
import
abstractmethod
from
typing
import
Any
,
List
,
Sequence
,
Tuple
,
Union
import
torch
from
mmcv.cnn
import
ConvModule
,
bias_init_with_prob
,
normal_init
from
mmcv.runner
import
force_fp32
from
torch
import
Tensor
from
torch
import
nn
as
nn
from
mmdet3d.core.utils
import
ConfigType
,
InstanceList
,
OptConfigType
from
mmdet3d.registry
import
MODELS
from
mmdet.core
import
multi_apply
from
..builder
import
build_loss
from
.base_mono3d_dense_head
import
BaseMono3DDenseHead
...
...
@@ -20,39 +21,41 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
num_classes (int): Number of categories excluding the background
category.
in_channels (int): Number of channels in the input feature map.
feat_channels (int
, optional
): Number of hidden channels.
feat_channels (int): Number of hidden channels.
Used in child classes. Defaults to 256.
stacked_convs (int, optional): Number of stacking convs of the head.
strides (tuple, optional): Downsample factor of each feature map.
dcn_on_last_conv (bool, optional): If true, use dcn in the last
stacked_convs (int): Number of stacking convs of the head.
strides (Sequence[int] or Sequence[Tuple[int, int]]): Downsample
factor of each feature map.
dcn_on_last_conv (bool): If true, use dcn in the last
layer of towers. Default: False.
conv_bias (bool
|
str
, optional
): If specified as `auto`, it will be
conv_bias (bool
or
str): If specified as `auto`, it will be
decided by the norm_cfg. Bias of conv will be set as True
if `norm_cfg` is None, otherwise False. Default: 'auto'.
background_label (
int
,
o
ptional): Label ID of background,
background_label (
bool
,
O
ptional): Label ID of background,
set as 0 for RPN and num_classes for other heads.
It will automatically set as `num_classes` if None is given.
use_direction_classifier (bool
, optional
):
use_direction_classifier (bool):
Whether to add a direction classifier.
diff_rad_by_sin (bool
, optional
): Whether to change the difference
diff_rad_by_sin (bool): Whether to change the difference
into sin difference for box regression loss. Defaults to True.
dir_offset (float
, optional
): Parameter used in direction
dir_offset (float): Parameter used in direction
classification. Defaults to 0.
dir_limit_offset (float
, optional
): Parameter used in direction
dir_limit_offset (float): Parameter used in direction
classification. Defaults to 0.
loss_cls (dict, optional): Config of classification loss.
loss_bbox (dict, optional): Config of localization loss.
loss_dir (dict, optional): Config of direction classifier loss.
loss_attr (dict, optional): Config of attribute classifier loss,
which is only active when `pred_attrs=True`.
bbox_code_size (int, optional): Dimensions of predicted bounding boxes.
pred_attrs (bool, optional): Whether to predict attributes.
loss_cls (:obj:`ConfigDict` or dict): Config of classification loss.
loss_bbox (:obj:`ConfigDict` or dict): Config of localization loss.
loss_dir (:obj:`ConfigDict` or dict): Config of direction classifier
loss.
loss_attr (:obj:`ConfigDict` or dict): Config of attribute classifier
loss, which is only active when `pred_attrs=True`.
bbox_code_size (int): Dimensions of predicted bounding boxes.
pred_attrs (bool): Whether to predict attributes.
Defaults to False.
num_attrs (int
, optional
): The number of attributes to be predicted.
num_attrs (int): The number of attributes to be predicted.
Default: 9.
pred_velo (bool
, optional
): Whether to predict velocity.
pred_velo (bool): Whether to predict velocity.
Defaults to False.
pred_bbox2d (bool
, optional
): Whether to predict 2D boxes.
pred_bbox2d (bool): Whether to predict 2D boxes.
Defaults to False.
group_reg_dims (tuple[int], optional): The dimension of each regression
target group. Default: (2, 1, 3, 1, 2).
...
...
@@ -66,68 +69,77 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
(64, ), # rot
() # velo
),
dir_branch (
tuple[int], optional
): Channels for direction
dir_branch (
Sequence[int]
): Channels for direction
classification branch. Default: (64, ).
attr_branch (
tuple[int], optional
): Channels for classification branch.
attr_branch (
Sequence[int]
): Channels for classification branch.
Default: (64, ).
conv_cfg (dict, optional): Config dict for convolution layer.
Default: None.
norm_cfg (dict, optional): Config dict for normalization layer.
Default: None.
train_cfg (dict, optional): Training config of anchor head.
test_cfg (dict, optional): Testing config of anchor head.
conv_cfg (:obj:`ConfigDict` or dict, Optional): Config dict for
convolution layer. Default: None.
norm_cfg (:obj:`ConfigDict` or dict, Optional): Config dict for
normalization layer. Default: None.
train_cfg (:obj:`ConfigDict` or dict, Optional): Training config
of anchor head.
test_cfg (:obj:`ConfigDict` or dict, Optional): Testing config of
anchor head.
init_cfg (:obj:`ConfigDict` or dict or list[:obj:`ConfigDict` or
\
dict]): Initialization config dict.
"""
# noqa: W605
_version
=
1
def
__init__
(
self
,
num_classes
,
in_channels
,
feat_channels
=
256
,
stacked_convs
=
4
,
strides
=
(
4
,
8
,
16
,
32
,
64
),
dcn_on_last_conv
=
False
,
conv_bias
=
'auto'
,
background_label
=
None
,
use_direction_classifier
=
True
,
diff_rad_by_sin
=
True
,
dir_offset
=
0
,
dir_limit_offset
=
0
,
loss_cls
=
dict
(
type
=
'FocalLoss'
,
num_classes
:
int
,
in_channels
:
int
,
feat_channels
:
int
=
256
,
stacked_convs
:
int
=
4
,
strides
:
Sequence
[
int
]
=
(
4
,
8
,
16
,
32
,
64
),
dcn_on_last_conv
:
bool
=
False
,
conv_bias
:
Union
[
bool
,
str
]
=
'auto'
,
background_label
:
bool
=
None
,
use_direction_classifier
:
bool
=
True
,
diff_rad_by_sin
:
bool
=
True
,
dir_offset
:
int
=
0
,
dir_limit_offset
:
int
=
0
,
loss_cls
:
ConfigType
=
dict
(
type
=
'
mmdet.
FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_attr
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
bbox_code_size
=
9
,
# For nuscenes
pred_attrs
=
False
,
num_attrs
=
9
,
# For nuscenes
pred_velo
=
False
,
pred_bbox2d
=
False
,
group_reg_dims
=
(
2
,
1
,
3
,
1
,
2
),
# offset, depth, size, rot, velo,
cls_branch
=
(
128
,
64
),
reg_branch
=
(
loss_bbox
:
ConfigType
=
dict
(
type
=
'mmdet.SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_dir
:
ConfigType
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_attr
:
ConfigType
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
bbox_code_size
:
int
=
9
,
# For nuscenes
pred_attrs
:
bool
=
False
,
num_attrs
:
int
=
9
,
# For nuscenes
pred_velo
:
bool
=
False
,
pred_bbox2d
:
bool
=
False
,
group_reg_dims
:
Sequence
[
int
]
=
(
2
,
1
,
3
,
1
,
2
),
# offset, depth, size, rot, velo,
cls_branch
:
Sequence
[
int
]
=
(
128
,
64
),
reg_branch
:
Sequence
[
Tuple
[
int
,
int
]]
=
(
(
128
,
64
),
# offset
(
128
,
64
),
# depth
(
64
,
),
# size
(
64
,
),
# rot
()
# velo
),
dir_branch
=
(
64
,
),
attr_branch
=
(
64
,
),
conv_cfg
=
None
,
norm_cfg
=
None
,
train_cfg
=
None
,
test_cfg
=
None
,
init_cfg
=
None
)
:
super
(
AnchorFreeMono3DHead
,
self
).
__init__
(
init_cfg
=
init_cfg
)
dir_branch
:
Sequence
[
int
]
=
(
64
,
),
attr_branch
:
Sequence
[
int
]
=
(
64
,
),
conv_cfg
:
OptConfigType
=
None
,
norm_cfg
:
OptConfigType
=
None
,
train_cfg
:
OptConfigType
=
None
,
test_cfg
:
OptConfigType
=
None
,
init_cfg
:
OptConfigType
=
None
)
->
None
:
super
().
__init__
(
init_cfg
=
init_cfg
)
self
.
num_classes
=
num_classes
self
.
cls_out_channels
=
num_classes
self
.
in_channels
=
in_channels
...
...
@@ -141,9 +153,9 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
self
.
diff_rad_by_sin
=
diff_rad_by_sin
self
.
dir_offset
=
dir_offset
self
.
dir_limit_offset
=
dir_limit_offset
self
.
loss_cls
=
build
_loss
(
loss_cls
)
self
.
loss_bbox
=
build
_loss
(
loss_bbox
)
self
.
loss_dir
=
build
_loss
(
loss_dir
)
self
.
loss_cls
=
MODELS
.
build
(
loss_cls
)
self
.
loss_bbox
=
MODELS
.
build
(
loss_bbox
)
self
.
loss_dir
=
MODELS
.
build
(
loss_dir
)
self
.
bbox_code_size
=
bbox_code_size
self
.
group_reg_dims
=
list
(
group_reg_dims
)
self
.
cls_branch
=
cls_branch
...
...
@@ -174,7 +186,7 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
self
.
num_attrs
=
num_attrs
if
self
.
pred_attrs
:
self
.
attr_background_label
=
num_attrs
self
.
loss_attr
=
build
_loss
(
loss_attr
)
self
.
loss_attr
=
MODELS
.
build
(
loss_attr
)
self
.
attr_branch
=
attr_branch
self
.
_init_layers
()
...
...
@@ -316,11 +328,13 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
if
self
.
pred_attrs
:
normal_init
(
self
.
conv_attr
,
std
=
0.01
,
bias
=
bias_cls
)
def
forward
(
self
,
feats
):
def
forward
(
self
,
x
:
Tuple
[
Tensor
]
)
->
Tuple
[
List
[
Tensor
],
List
[
Tensor
],
List
[
Tensor
],
List
[
Tensor
]]:
"""Forward features from the upstream network.
Args:
feats
(tuple[Tensor]): Features from the upstream network, each is
x
(tuple[Tensor]): Features from the upstream network, each is
a 4D-tensor.
Returns:
...
...
@@ -339,9 +353,9 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
level, each is a 4D-tensor, the channel number is
num_points * num_attrs.
"""
return
multi_apply
(
self
.
forward_single
,
feats
)[:
5
]
return
multi_apply
(
self
.
forward_single
,
x
)[:
5
]
def
forward_single
(
self
,
x
)
:
def
forward_single
(
self
,
x
:
Tensor
)
->
Tuple
[
Tensor
,
...]
:
"""Forward features of a single scale level.
Args:
...
...
@@ -394,77 +408,8 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
reg_feat
@
abstractmethod
@
force_fp32
(
apply_to
=
(
'cls_scores'
,
'bbox_preds'
,
'dir_cls_preds'
))
def
loss
(
self
,
cls_scores
,
bbox_preds
,
dir_cls_preds
,
attr_preds
,
batch_gt_instances_3d
,
batch_img_metas
,
batch_gt_instances_ignore
=
None
):
"""Compute loss of the head.
Args:
cls_scores (list[Tensor]): Box scores for each scale level,
each is a 4D-tensor, the channel number is
num_points * num_classes.
bbox_preds (list[Tensor]): Box energies / deltas for each scale
level, each is a 4D-tensor, the channel number is
num_points * bbox_code_size.
dir_cls_preds (list[Tensor]): Box scores for direction class
predictions on each scale level, each is a 4D-tensor,
the channel number is num_points * 2. (bin = 2)
attr_preds (list[Tensor]): Box scores for each scale level,
each is a 4D-tensor, the channel number is
num_points * num_attrs.
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instance_3d. It usually includes ``bboxes``、``labels``
、``bboxes_3d``、``labels3d``、``depths``、``centers2d`` and
attributes.
batch_img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
batch_gt_instances_ignore (list[:obj:`InstanceData`], Optional):
Batch of gt_instances_ignore. It includes ``bboxes`` attribute
data that is ignored during training and testing.
Defaults to None.
"""
raise
NotImplementedError
@
abstractmethod
@
force_fp32
(
apply_to
=
(
'cls_scores'
,
'bbox_preds'
,
'dir_cls_preds'
))
def
get_results
(
self
,
cls_scores
,
bbox_preds
,
dir_cls_preds
,
attr_preds
,
batch_img_metas
,
cfg
=
None
,
rescale
=
None
):
"""Transform network output for a batch into bbox predictions.
Args:
cls_scores (list[Tensor]): Box scores for each scale level
Has shape (N, num_points * num_classes, H, W)
bbox_preds (list[Tensor]): Box energies / deltas for each scale
level with shape (N, num_points * bbox_code_size, H, W)
dir_cls_preds (list[Tensor]): Box scores for direction class
predictions on each scale level, each is a 4D-tensor,
the channel number is num_points * 2. (bin = 2)
attr_preds (list[Tensor]): Attribute scores for each scale level
Has shape (N, num_points * num_attrs, H, W)
batch_img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
cfg (mmcv.Config): Test / postprocessing configuration,
if None, test_cfg would be used
rescale (bool): If True, return boxes in original image space
"""
raise
NotImplementedError
@
abstractmethod
def
get_targets
(
self
,
points
,
batch_gt_instances_3d
):
def
get_targets
(
self
,
points
:
List
[
Tensor
],
batch_gt_instances
:
InstanceList
)
->
Any
:
"""Compute regression, classification and centerss targets for points
in multiple images.
...
...
@@ -473,18 +418,32 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
(num_points, 2).
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instance_3d. It usually includes ``bboxes``、``labels``
、``bboxes_3d``、``labels3d``、``depths``、``centers2d``
and
attributes.
、``bboxes_3d``、``labels
_
3d``、``depths``、``centers
_
2d``
and
attributes.
"""
raise
NotImplementedError
# TODO: Refactor using MlvlPointGenerator in MMDet.
def
_get_points_single
(
self
,
featmap_size
,
stride
,
dtype
,
device
,
flatten
=
False
):
"""Get points of a single scale level."""
featmap_size
:
Tuple
[
int
],
stride
:
int
,
dtype
:
torch
.
dtype
,
device
:
torch
.
device
,
flatten
:
bool
=
False
)
->
Tuple
[
Tensor
,
Tensor
]:
"""Get points of a single scale level.
Args:
featmap_size (tuple[int]): Single scale level feature map
size.
stride (int): Downsample factor of the feature map.
dtype (torch.dtype): Type of points.
device (torch.device): Device of points.
flatten (bool): Whether to flatten the tensor.
Defaults to False.
Returns:
tuple: points of each image.
"""
h
,
w
=
featmap_size
x_range
=
torch
.
arange
(
w
,
dtype
=
dtype
,
device
=
device
)
y_range
=
torch
.
arange
(
h
,
dtype
=
dtype
,
device
=
device
)
...
...
@@ -494,16 +453,23 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
x
=
x
.
flatten
()
return
y
,
x
def
get_points
(
self
,
featmap_sizes
,
dtype
,
device
,
flatten
=
False
):
# TODO: Refactor using MlvlPointGenerator in MMDet.
def
get_points
(
self
,
featmap_sizes
:
List
[
Tuple
[
int
]],
dtype
:
torch
.
dtype
,
device
:
torch
.
device
,
flatten
:
bool
=
False
)
->
List
[
Tuple
[
Tensor
,
Tensor
]]:
"""Get points according to feature map sizes.
Args:
featmap_sizes (list[tuple]): Multi-level feature map sizes.
dtype (torch.dtype): Type of points.
device (torch.device): Device of points.
flatten (bool): Whether to flatten the tensor.
Defaults to False.
Returns:
tuple: points of each image.
list[
tuple
]
: points of each image.
"""
mlvl_points
=
[]
for
i
in
range
(
len
(
featmap_sizes
)):
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment